items.pyimportscrapyclassNews163Item(scrapy.Item):title=scrapy.Field()url=scrapy.Field()source=scrapy.Field()content=scrapy.Field()news_spider.py#coding:utf-8fromscrapy.contrib.linkextractorsimportLinkExtractorfromscrapy.contrib.spidersimportCrawlSpider,RuleclassExampleSpider(CrawlSpider):name="news"allowed_Domains=["news.163.com"]start_urls=['http://news.163.com/']rules=[Rule(LinkExtractor(allow=r"/14/12\d+/\d+/*"),'parse_news')]defparse_news(self,response):news=News163Item()news['title']=response.xpath("//*[@id="h1title"]/text()").extract()news['source']=response.xpath("//*[@id="ne_article_source"]/text()").extract()news['content']=response.xpath("//*[@id="endText"]/text()").extract()news['url']=response.urlreturnnewscd进入所在目录后,命令行执行:scrapycrawlnews-onews163.json会跳出如下错误:Traceback(mostrecentcalllast):File"/usr/bin/scrapy",line9,inload_entry_point('Scrapy==0.24.4','console_scripts','scrapy')()File"/usr/lib/pymodules/python2.7/scrapy/cmdline.py",line143,inexecute_run_print_help(parser,_run_command,cmd,args,opts)File"/usr/lib/pymodules/python2.7/scrapy/cmdline.py",line89,in_run_print_helpfunc(*a,**kw)File"/usr/lib/pymodules/python2.7/scrapy/cmdline.py",line150,in_run_commandcmd.run(args,opts)File"/usr/lib/pymodules/python2.7/scrapy/commands/crawl.py",line57,inruncrawler=self.crawler_process.create_crawler()File"/usr/lib/pymodules/python2.7/scrapy/crawler.py",line87,increate_crawlerself.crawlers[name]=Crawler(self.settings)File"/usr/lib/pymodules/python2.7/scrapy/crawler.py",line25,in__init__self.spiders=spman_cls.from_crawler(self)File"/usr/lib/pymodules/python2.7/scrapy/spidermanager.py",line35,infrom_crawlersm=cls.from_settings(crawler.settings)File"/usr/lib/pymodules/python2.7/scrapy/spidermanager.py",line31,infrom_settingsreturncls(settings.getlist('SPIDER_MODULES'))File"/usr/lib/pymodules/python2.7/scrapy/spidermanager.py",line22,in__init__formoduleinwalk_modules(name):File"/usr/lib/pymodules/python2.7/scrapy/utils/misc.py",line68,inwalk_modulessubmod=import_module(fullpath)File"/usr/lib/python2.7/importlib/__init__.py",line37,inimport_module__import__(name)File"/home/gao/news/news/spiders/news_spider.py",line15news['title']=response.xpath("//*[@id="h1title"]/text()").extract()^SyntaxError:invalidsyntax请问是哪里出错了?python新手,scrapy也是最近才用的,很生疏,求指点。谢谢:@捏造的信仰的回答,但是更改过之后,还是有错误。2014-12-0220:13:02+0800[news]ERROR:SpidererrorprocessingTraceback(mostrecentcalllast):File"/usr/lib/python2.7/dist-packages/twisted/internet/base.py",line824,inrunUntilCurrentcall.func(*call.args,**call.kw)File"/usr/lib/python2.7/dist-packages/twisted/internet/task.py",line638,in_ticktaskObj._oneWorkUnit()File"/usr/lib/python2.7/dist-packages/twisted/internet/task.py",line484,in_oneWorkUnitresult=next(self._iterator)File"/usr/lib/pymodules/python2.7/scrapy/utils/defer.py",line57,inwork=(callable(elem,*args,**named)foreleminiterable)------File"/usr/lib/pymodules/python2.7/scrapy/utils/defer.py",line96,initer_errbackyieldnext(it)File"/usr/lib/pymodules/python2.7/scrapy/contrib/spidermiddleware/offsite.py",line26,inprocess_spider_outputforxinresult:File"/usr/lib/pymodules/python2.7/scrapy/contrib/spidermiddleware/referer.py",line22,inreturn(_set_referer(r)forrinresultor())File"/usr/lib/pymodules/python2.7/scrapy/contrib/spidermiddleware/urllength.py",line33,inreturn(rforrinresultor()if_filter(r))File"/usr/lib/pymodules/python2.7/scrapy/contrib/spidermiddleware/depth.py",line50,inreturn(rforrinresultor()if_filter(r))File"/usr/lib/pymodules/python2.7/scrapy/contrib/spiders/crawl.py",line67,in_parse_responsecb_res=callback(response,**cb_kwargs)or()File"/home/gao/news/news/spiders/news_spider.py",line14,inparse_newsnews=News163Item()exceptions.NameError:globalname'News163Item'isnotdefined请问这又是什么原因呢?
2 回答

慕田峪7331174
TA贡献1828条经验 获得超13个赞
字符串外部使用的是双引号,在双引号内部还需要使用引号的话可以使用单引号。例如news['title']=response.xpath("//*[@id='h1title']/text()").extract()

慕桂英546537
TA贡献1848条经验 获得超10个赞
字符串中的引号没有转码导致的语法错误。应该改为news['title']=response.xpath("//*[@id=\"h1title\"]/text()").extract()下面几行也是的。
添加回答
举报
0/150
提交
取消