问答详情
源自:-

拿到下一页的链接就是没有翻页

# -*- coding: utf-8 -*-

import scrapy

from Lz.items import LzItem



class LzSpiderSpider(scrapy.Spider):

    name = 'Lz_spider'

    allowed_domains = ['xxgk.linzhang.gov.cn']

    start_urls = ['http://xxgk.linzhang.gov.cn/zxxxgk/index_1.html']


    def parse(self, response):

        info_lists = response.xpath("//div[@class='lzgk_wenjianlist']/table//tr")


        for content in info_lists:

            lz_item = LzItem()

            lz_item['title'] = content.xpath("./td[1]/a/text()").extract_first()

            lz_item['times'] = content.xpath("./td[4]/text()").extract_first()

            yield lz_item


        next_link = response.xpath("//div[@class='page']/a[7]/@href").extract()

        if next_link:

            next_link = next_link[0]

            yield scrapy.Request("http://xxgk.linzhang.gov.cn/zxxxgk/"+next_link,callback=self.parse)


提问者:慕粉1472488217 2020-03-29 19:56

个回答