# -*- coding: utf-8 -*-
import scrapy
from douban.items import DoubanItem
class DoubanSpiderSpider(scrapy.Spider):
#这素爬虫名称
name = "douban_spider"
#容许的域名
allowed_domains = ["movie.douban.com"]
#入口url,扔到调度器里面去
start_urls = ['https://movie.douban.com/top250']
def parse(self, response):
#print(response.text)
movie_list = response.xpath("//div[@class='article']//ol[@class='gril_view']/li/")
# print(movie_list)
for i_item in movie_list:
# douban_item = DoubanItem()
# douban_item['serial_number'] = i_item.xpath(".//div[@class='item']//em/text()").extract_first()
print(i_item)
我用的环境是python3.7。