Scrapy：如何使用元在方法之间传递项目

我是scrapy和python的新手，我试图将parse_quotes中的item ['author']传递给下一个解析方法parse_bio我尝试了 request.meta 和 response.meta 方法，如scrapy文档中所示，但没有成功。请参阅下面的代码。import scrapyfrom tutorial.items import QuotesItemclass QuotesSpider(scrapy.Spider): name = "quotes" start_urls = [ 'http://quotes.toscrape.com/login', #'http://quotes.toscrape.com/page/2', ] # Scraping a site with login # Important: Cookie settings must be "True" to keep the login session alive custom_settings = {'COOKIES_ENABLED': True} def parse(self, response): return scrapy.FormRequest.from_response( response, formdata={'username': 'john', 'password': 'secret'}, callback=self.parse_quotes ) def parse_quotes(self, response): for sel in response.css('div.quote'): item = QuotesItem() item['text'] = sel.css('span.text::text').get() item['author'] = sel.css('small.author::text').get() item['tags'] = sel.css('div.tags a.tag::text').getall() item['quotelink'] = sel.css('small.author ~ a[href*="goodreads.com"]::attr(href)').get() item['author_bio_link'] = sel.css('.author + a') yield item # follow the detail links @ shortcut # vertical crawling for a in item['author_bio_link']: yield response.follow(a, callback = self.parse_bio) def parse_bio(self, response): item = QuotesItem() item['author_born'] = response.css('p span::text').getall() item['author_born'] = item['author_born'][:2] item['author_bio'] = response.css('div.author-description ::text').get().strip() yield item # follow pagination links @ shortcut # horizontal crawling for a in response.css('li.next a'): yield response.follow(a, callback = self.parse_quotes)我希望从传递给 parse_bio 的 parse_quotes 中获取 item['author']

查看完整描述

1 回答

侃侃尔雅

TA贡献1801条经验获得超15个赞

我建议你这样使用meta：

def parse_quotes(self, response):

for sel in response.css('div.quote'):

item = QuotesItem()

item['text'] = sel.css('span.text::text').get()

item['author'] = sel.css('small.author::text').get()

item['tags'] = sel.css('div.tags a.tag::text').getall()

item['quotelink'] = sel.css('small.author ~ a[href*="goodreads.com"]::attr(href)').get()

item['author_bio_link'] = sel.css('.author + a')

yield item

# follow the detail links @ shortcut

# vertical crawling

for a in item['author_bio_link']:

yield response.follow(a, self.parse_bio,

meta={'author': item['author']}) # <- you set it here

def parse_bio(self, response):

item = QuotesItem()

item['author_born'] = response.css('p span::text').getall()

item['author_born'] = item['author_born'][:2]

item['author_data'] = response.meta.get('author') # <- you get it here

item['author_bio'] = response.css('div.author-description ::text').get().strip()

yield item

# follow pagination links @ shortcut

# horizontal crawling

for a in response.css('li.next a'):

yield response.follow(a, callback = self.parse_quotes)

反对回复 2021-12-29

热搜

最近搜索清空

Scrapy：如何使用元在方法之间传递项目

Scrapy：如何使用元在方法之间传递项目

1 回答

添加回答