soup.find()总是提示我SyntaxError: non-default argument follows default argument
from bs4 import BeautifulSoup import re import urllib.parse from urllib.parse import urljoin import urllib.request class HtmlParser(object): def _get_new_urls(self,page_url , soup): print("get_new_urls") new_urls = set() links = soup.find_all('a' , href=re.compile(r"/view/\d+\.htm")) for link in links: new_url = link['href'] new_full_url = urllib.parse.urljoin(page_url,new_url) new_url.add(new_full_url) return new_urls def _get_new_dataa(self, page_url, soup): print("in parse def _get_new_data") res_data = {} # url res_data['url'] = page_url # <dd class="lemmaWgt-lemmaTitle-title"><h1>Python</h1> title_node = soup.find('dd', class_="lemmaWgt-lemmaTitle-title").find("h1") res_data['title'] = title_node.get_text() # <div class="lemma-summary" label-module="lemmaSummary"> summary_node = soup.find('div', class_="lemma-summary") res_data['summary'] = summary_node.get_text() print ("get_over") return res_data def _get_new_data(self,page_url,soup): print("get_new_data") res_data = {} res_data['url'] = page_url title_node = soup.find('dd', attrs={"class":"lemmaWgt-lemmaTitle-title"}).find('h1') res_data['title'] = title_node.get_text() summary_node = soup.find('div', attrs={"class":"lemma-summary"}) res_data['summary'] = summary_node.get_text() return res_data def parse(self,page_url,html_cont): print("parse") if page_url is None or html_cont is None: return soup = BeautifulSoup(html_cont,'html.parser',from_encoding='utf-8') new_urls = self._get_new_urls(page_url,soup) new_data = self._get_new_data(page_url, soup) return new_urls,new_data
然后提示我错误
C:\Users\Administrator\AppData\Local\Programs\Python\Python35\python.exe D:/要用的/python/text/spider_main.py Traceback (most recent call last): File "D:/要用的/python/text/spider_main.py", line 1, in <module> from text import url_manager, html_downloader, html_parser, html_outputer File "D:\要用的\python\text\html_parser.py", line 1, in <module> from bs4 import BeautifulSoup File "C:\Users\Administrator\AppData\Roaming\Python\Python35\site-packages\bs4\__init__.py", line 35, in <module> from .builder import builder_registry, ParserRejectedMarkup File "C:\Users\Administrator\AppData\Roaming\Python\Python35\site-packages\bs4\builder\__init__.py", line 7, in <module> from bs4.element import ( File "C:\Users\Administrator\AppData\Roaming\Python\Python35\site-packages\bs4\element.py", line 1273 def find(self, name: object = None, attrs: object = {}, recursive: object = True, text: object = None, ^ SyntaxError: non-default argument follows default argument
我不知道我的find函数哪里错了 提示我要添加变量不知道怎么弄 有没有人可以帮下我 麻烦了 很急