html_parser里有一句报错解决不了
def _get_new_urls(self, page_url, soup): new_urls=set() #/view/123.com links=soup.find_all('a',herf=re._compile(r"/view/\d+\.htm")) for link in links: new_url=link['herf'] new_full_url= new_urls.add(new_full_url) return new_urls