但我无法获取数据,现在我有这个代码:
import scrapy
from scrapy import Selector
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from logzero import logfile, logger
class ScrapeTableSpider(scrapy.Spider):
name = "scrape-table"
allowed_domains = ["toscrape.com"]
start_urls = ['http://quotes.toscrape.com']
def start_requests(self):
# headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:48.0) Gecko/20100101 Firefox/48.0'}
for url in self.start_urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
# driver = webdriver.Chrome()
options = webdriver.ChromeOptions()
options.add_argument("headless")
desired_capabilities = options.to_capabilities()
driver = webdriver.Chrome('C:/chromedriver', desired_capabilities=desired_capabilities)
driver.get("https://www.timeshighereducation.com/world-university-rankings/2021/world-ranking#!/page/0/length/25/sort_by/scores_overall/sort_order/asc/cols/scores")
driver.implicitly_wait(2)
for table in driver.find_element_by_xpath('//*[contains(@id,"datatable-1")]//tr'):
data = [item.text for item in table.find_elements_by_xpath(".//*[self::td or self::th]")]
print(data)
任何有关如何从表中获取数据的见解将不胜感激。
繁花不似锦
慕慕森
相关分类