输出乱码可尝试做以下修改:
fout = open('output.html', 'w', encoding='utf-8') # 此处添加encoding='utf-8'
# 在fout.write("<html>")后添加这一行:
fout.write("<head><meta http-equiv='content-type' content='text/html;charset=utf-8'></head>")
fout = open('output.html', 'w', encoding='utf-8') # 此处添加encoding='utf-8'
# 在fout.write("<html>")后添加这一行:
fout.write("<head><meta http-equiv='content-type' content='text/html;charset=utf-8'></head>")
2018-04-08
print("正则匹配")
link_node = soup.find('a', href = re.compile(r"ill"))
print(link_node.name, link_node['href'], link_node.get_text())
link_node = soup.find('a', href = re.compile(r"ill"))
print(link_node.name, link_node['href'], link_node.get_text())
2018-04-04
print("获取lacie的链接")
link_node = soup.find('a', href = 'http://example.com/lacie')
print(link_node.name, link_node['href'], link_node.get_text())
link_node = soup.find('a', href = 'http://example.com/lacie')
print(link_node.name, link_node['href'], link_node.get_text())
2018-04-04
soup = BeautifulSoup(html_doc,'html.parser',from_encoding='utf-8')
print("获取所有的链接")
links = soup.find_all('a')
for link in links:
print(link.name , link['href'],link.get_text())
print("获取所有的链接")
links = soup.find_all('a')
for link in links:
print(link.name , link['href'],link.get_text())
2018-04-04