class UrlManager(object): def __init____(self): self.new_urls = set() self.old_urls = set() def add_new_url(self, url): if url is None: return if url not in self.new_urls and url not in self.old_urls: self.new_urls.add(url) def add_new_urls(self, urls): if urls is None or len(urls) == 0: return for url in urls: self.add_new_url(url) def has_new_url(self): return len(self.new_urls) != 0 def get_new_url(self): new_url = self.new_urls.pop() self.old_urls.add(new_url) return new_urlTraceback (most recent call last): File "E:\java\imooc\baike_spider\spider_main.py", line 38, in <module> obj_spider.craw(root_url) File "E:\java\imooc\baike_spider\spider_main.py", line 13, in craw self.urls.add_new_url(root_url) File "E:\java\imooc\baike_spider\url_manager.py", line 9, in add_new_url if url not in self.new_urls and url not in self.old_urls:AttributeError: 'UrlManager' object has no attribute 'new_urls'
添加回答
举报
0/150
提交
取消