他在Scrapy中创建了一个蜘蛛:items.py:from scrapy.item import Item, Fieldclass dns_shopItem (Item): # Define the fields for your item here like: # Name = Field () id = Field () idd = Field ()dns_shop_spider.py:from scrapy.contrib.spiders import CrawlSpider, Rulefrom scrapy.contrib.linkextractors.sgml import SgmlLinkExtractorfrom scrapy.contrib.loader.processor import TakeFirstfrom scrapy.contrib.loader import XPathItemLoaderfrom scrapy.selector import HtmlXPathSelectorfrom dns_shop.items import dns_shopItem class dns_shopLoader (XPathItemLoader): default_output_processor = TakeFirst () class dns_shopSpider (CrawlSpider): name = "dns_shop_spider" allowed_domains = ["www.playground.ru"] start_urls = ["http://www.playground.ru/files/stalker_clear_sky/"] rules = ( Rule (SgmlLinkExtractor (allow = ('/ files / s_t_a_l_k_e_r_chistoe_nebo')), follow = True), Rule (SgmlLinkExtractor (allow = ('/ files / s_t_a_l_k_e_r_chistoe_nebo')), callback = 'parse_item'), ) def parse_item (self, response): hxs = HtmlXPathSelector (response) l = dns_shopLoader (dns_shopItem (), hxs) l.add_xpath ('id', "/ html / body / table [2] / tbody / tr [5] / td [2] / table / tbody / tr / td / div [6] / h1/text ()" ) l.add_xpath ('idd', "/ / html / body / table [2] / tbody / tr [5] / td [2] / table / tbody / tr / td / div [6] / h1/text () ") return l.load_item ()运行以下命令:scrapy crawl dns_shop_spider-o scarped_data_utf8.csv-t csv此日志显示Scrapy遍历了所有必需的URL,但是为什么在启动Spider时不将其写入指定的文件。可能是什么问题?
添加回答
举报
0/150
提交
取消