# -*- coding: utf-8 -*-
import scrapy
from douban.items import DoubanItem
class DoubanSpiderSpider(scrapy.Spider):
#爬虫名
name = 'douban_spider'
allowed_domains = ['movic.douban.com']
start_urls = ['https://movie.douban.com/top250']
def parse(self, response):
movic_list=response.xpath("//div[@class='article']//ol[@class='grid_view']/li")
for i_item in movic_list:
douban_item=DoubanItem()
douban_item['serial_number']=i_item.xpath(".//div[@class='item']//em/text()").extract_first()
douban_item['movic_name']=i_item.xpath(".//div[@class='info']/div[@class='hd']/a/span[1]/text()").extract_first()
content=i_item.xpath(".//div[@class='info']//div[@class='bd']/p[1]/text()").extract_first()
for i_content in content:
content_s="".join(i_content.split())
douban_item['introduce']=content_s
douban_item['star']=i_item.xpath(".//span[@class='rating_num']/text()").extract_first()
douban_item['evaluate']=i_item.xpath(".//div[@class='star']//span[4]/text()").extract_first()
douban_item["des"]=i_item.xpath(".//p[@class='quote']/span/text()").extract_first()
print(douban_item)
yield douban_item
next_link=response.xpath("//span[@class='next']/link/@href").extract()
if next_link:
next_link=next_link[0]
yield scrapy.Request("https://movie.douban.com/top250" + next_link, callback=self.parse)