拿到下一页的链接就是没有翻页
# -*- coding: utf-8 -*-
import scrapy
from Lz.items import LzItem
class LzSpiderSpider(scrapy.Spider):
name = 'Lz_spider'
allowed_domains = ['xxgk.linzhang.gov.cn']
start_urls = ['http://xxgk.linzhang.gov.cn/zxxxgk/index_1.html']
def parse(self, response):
info_lists = response.xpath("//div[@class='lzgk_wenjianlist']/table//tr")
for content in info_lists:
lz_item = LzItem()
lz_item['title'] = content.xpath("./td[1]/a/text()").extract_first()
lz_item['times'] = content.xpath("./td[4]/text()").extract_first()
yield lz_item
next_link = response.xpath("//div[@class='page']/a[7]/@href").extract()
if next_link:
next_link = next_link[0]
yield scrapy.Request("http://xxgk.linzhang.gov.cn/zxxxgk/"+next_link,callback=self.parse)