global name 'BeautifulSoup' is not defined
#coding:utf8 from bs4 import BeautifulSoup import urlparse import re class HtmlParser(object): def parse(self, page_url, html_cont): if page_url is None or html_cont is None: return soup = BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8') new_urls = self._get_new_urls(page_url, soup) new_data = self._get_new_data(page_url, soup) return new_urls, new_data def _get_new_urls(self, page_url, soup): new_urls = set() links = soup.find_all('a', href=re.compile(r"/item/(.*)")) for link in links: new_url = link['href'] new_full_url = urlparse.urljoin(page_url,new_url) new_urls.add(new_full_url) return new_urls def _get_new_data(self, page_url, soup): res_data = {} res_data['url'] = page_url title_node = soup.find('dd',class_= "lemmaWgt-lemmaTitle-title").find("h1") res_data['title'] = title_node.get_text() summary_node = soup.find('div', class_ = "lemma-summary") res_data['summary'] = summary_node.get_text() return res_data
在html_parser里面加了:from bs4 import BeautifulSoup 但是还是报错。神奇的是前面的测试BeautifulSoup的程序跑起来没有问题。有大虾帮忙看下怎么回事么
错误如下:
NameErrorTraceback (most recent call last) /Users/yang/PythonSource/pachong/spider_main.py in <module>() 34 root_url = "https://baike.baidu.com/item/Python/407313?fr=aladdin" 35 obj_spider = SpiderMain() ---> 36 obj_spider.craw(root_url) 37 /Users/yang/PythonSource/pachong/spider_main.py in craw(self, root_url) 21 html_cont = self.downloader.download(new_url) 22 print new_url ---> 23 new_urls, new_data = self.parser.parse(new_url, html_cont) 24 self.urls.add_new_urls(new_urls) 25 self.outputer.collect_data(new_data) /Users/yang/PythonSource/pachong/html_parser.py in parse(self, page_url, html_cont) 31 title_node = soup.find('dd',class_= "lemmaWgt-lemmaTitle-title").find("h1") 32 res_data['title'] = title_node.get_text() ---> 33 34 summary_node = soup.find('div', class_ = "lemma-summary") 35 res_data['summary'] = summary_node.get_text() NameError: global name 'BeautifulSoup' is not defined