使用 Scrapy 从 Business Insider 抓取股票详细信息

XPATH版本    def parse(self, response):        rows = response.xpath('//*[@id="index-list-container"]/div[2]/table/tr')        for row in rows:            yield{                'name' : row.xpath('td[1]/a/text()').extract(),                'price':row.xpath('td[2]/text()[1]').extract(),                'pct':row.xpath('td[5]/span[2]/text()').extract(),                'datetime':row.xpath('td[7]/span[2]/text()').extract(),            }CSS版本    def parse(self, response):        table = response.css('div#index-list-container table.table-small')         rows = table.css('tr')         for row in rows:            name = row.css("a::text").get()            high_low = row.css('td:nth-child(2)::text').get()            date_time = row.css('td:nth-child(7) span:nth-child(2) ::text').get()            yield {                      'name' : name,                 'high_low': high_low,                'date_time' : date_time                            }结果{"high_low": "\r\n146.44", "name": "3M", "date_time": "05/26/2020 04:15:11 PM UTC-0400"},{"high_low": "\r\n42.22", "name": "AO Smith", "date_time": "05/26/2020 04:15:11 PM UTC-0400"},{"high_low": "\r\n91.47", "name": "Abbott Laboratories", "date_time": "05/26/2020 04:15:11 PM UTC-0400"},{"high_low": "\r\n92.10", "name": "AbbVie", "date_time": "05/26/2020 04:15:11 PM UTC-0400"},{"high_low": "\r\n193.71", "name": "Accenture", "date_time": "05/26/2020 04:15:11 PM UTC-0400"},{"high_low": "\r\n73.08", "name": "Activision Blizzard", "date_time": "05/25/2020 08:00:00 PM UTC-0400"},{"high_low": "\r\n385.26", "name": "Adobe", "date_time": "05/25/2020 08:00:00 PM UTC-0400"},{"high_low": "\r\n133.48", "name": "Advance Auto Parts", "date_time": "05/26/2020 04:15:11 PM UTC-0400"},

使用 Scrapy 从 Business Insider 抓取股票详细信息

2回答