Scrapy:如何使用元在方法之间传递项目

我是scrapy和python的新手,我试图将parse_quotes中的item ['author']传递给下一个解析方法parse_bio


我尝试了 request.meta 和 response.meta 方法,如scrapy文档中所示,但没有成功。请参阅下面的代码。


import scrapy

from tutorial.items import QuotesItem


class QuotesSpider(scrapy.Spider): 


name = "quotes"


    start_urls = [

        'http://quotes.toscrape.com/login',

        #'http://quotes.toscrape.com/page/2',

    ]


    # Scraping a site with login

    # Important: Cookie settings must be "True" to keep the login session alive


    custom_settings = {'COOKIES_ENABLED': True}


    def parse(self, response):

        return scrapy.FormRequest.from_response(

            response,

            formdata={'username': 'john', 'password': 'secret'},

            callback=self.parse_quotes

            )

    

    

    def parse_quotes(self, response):

        for sel in response.css('div.quote'):

            item = QuotesItem()

            item['text'] = sel.css('span.text::text').get()

            item['author'] = sel.css('small.author::text').get()

            item['tags'] = sel.css('div.tags a.tag::text').getall()

            item['quotelink'] = sel.css('small.author ~ a[href*="goodreads.com"]::attr(href)').get()

            item['author_bio_link'] = sel.css('.author + a')

            yield item

        

            # follow the detail links @ shortcut

            # vertical crawling

            for a in item['author_bio_link']:

                yield response.follow(a, callback = self.parse_bio)

                   

    def parse_bio(self, response):

        item = QuotesItem()

        item['author_born'] = response.css('p span::text').getall()

        item['author_born'] = item['author_born'][:2]

        item['author_bio'] = response.css('div.author-description ::text').get().strip()

        yield item

             

        # follow pagination links @ shortcut

        # horizontal crawling

    

        for a in response.css('li.next a'):

            yield response.follow(a, callback = self.parse_quotes)

我希望从传递给 parse_bio 的 parse_quotes 中获取 item['author']


料青山看我应如是
浏览 206回答 1
1回答

侃侃尔雅

我建议你这样使用meta:def parse_quotes(self, response):&nbsp; &nbsp; for sel in response.css('div.quote'):&nbsp; &nbsp; &nbsp; &nbsp; item = QuotesItem()&nbsp; &nbsp; &nbsp; &nbsp; item['text'] = sel.css('span.text::text').get()&nbsp; &nbsp; &nbsp; &nbsp; item['author'] = sel.css('small.author::text').get()&nbsp; &nbsp; &nbsp; &nbsp; item['tags'] = sel.css('div.tags a.tag::text').getall()&nbsp; &nbsp; &nbsp; &nbsp; item['quotelink'] = sel.css('small.author ~ a[href*="goodreads.com"]::attr(href)').get()&nbsp; &nbsp; &nbsp; &nbsp; item['author_bio_link'] = sel.css('.author + a')&nbsp; &nbsp; &nbsp; &nbsp; yield item&nbsp; &nbsp; &nbsp; &nbsp; # follow the detail links @ shortcut&nbsp; &nbsp; &nbsp; &nbsp; # vertical crawling&nbsp; &nbsp; &nbsp; &nbsp; for a in item['author_bio_link']:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; yield response.follow(a, self.parse_bio,&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; meta={'author': item['author']})&nbsp; # <- you set it heredef parse_bio(self, response):&nbsp; &nbsp; item = QuotesItem()&nbsp; &nbsp; item['author_born'] = response.css('p span::text').getall()&nbsp; &nbsp; item['author_born'] = item['author_born'][:2]&nbsp; &nbsp; item['author_data'] = response.meta.get('author')&nbsp; # <- you get it here&nbsp; &nbsp; item['author_bio'] = response.css('div.author-description ::text').get().strip()&nbsp; &nbsp; yield item&nbsp; &nbsp; # follow pagination links @ shortcut&nbsp; &nbsp; # horizontal crawling&nbsp; &nbsp; for a in response.css('li.next a'):&nbsp; &nbsp; &nbsp; &nbsp; yield response.follow(a, callback = self.parse_quotes)
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python