我的代码是用pycharm敲的,为啥到运行的时候报了这个错,求大神解答,谢谢
我感觉没啥错啊,都是对着老师的代码敲得,哭泣
我感觉没啥错啊,都是对着老师的代码敲得,哭泣
2017-09-17
# 载入url管理器,解析器,下载器,输出器模块 from SpiderBaike import url_manager, html_parser, html_downloader, html_output # 定义爬虫类 class Spider(object): def __init__(self): # url管理器类等生成实例对象 self.url_manager = url_manager.UrlManager() self.parser = html_parser.Parser() self.downloader = html_downloader.Downloader() self.output = html_output.Output() def crawl(self, root_url): count = 1 self.url_manager.add(root_url) while self.url_manager.has_new(): try: new_url = self.url_manager.get_url() print(count) print(new_url) html_cont = self.downloader.download(new_url) new_urls, data = self.parser.parse(new_url, html_cont) self.output.get_data(data) self.url_manager.add_list(new_urls) if count == 10: break count = count + 1 except: print('Craw failed') if __name__ == '__main__': root_url = 'https://baike.baidu.com/item/Python' obj_spider = Spider() obj_spider.crawl(root_url)发现自己哪里错了,错把__init__ 写成了 __int__了,楼主你应该和我的问题差不多,看看你的__init__方法吧
# 载入url管理器,解析器,下载器,输出器模块 from SpiderBaike import url_manager, html_parser, html_downloader, html_output # 定义爬虫类 class Spider(object): def __int__(self): # url管理器类等生成实例对象 self.m_url_manager = url_manager.UrlManager() self.m_parser = html_parser.Parser() self.m_downloader = html_downloader.Downloader() self.m_output = html_output.Output() def crawl(self, root_url): print(root_url) # count = 1 self.m_url_manager.add(root_url) # while self.m_url_manager.has_new(): # try: # new_url = self.m_url_manager.get_url() # print(count) # print(new_url) # html_cont = self.m_downloader.download(new_url) # new_urls, data = self.m_parser.parse(new_url, html_cont) # self.m_output.get_data(data) # self.m_url_manager.add_list(new_urls) # if count == 10: # break # count = count + 1 # except: # print('Craw failed') if __name__ == '__main__': root_url = 'https://baike.baidu.com/item/Python' obj_spider = Spider() obj_spider.crawl(root_url)
我的和你一样报错,怎么解决?
举报