# coding:utf8from bs4 import BeautifulSoup import re html_doc = """<html><head><title>The Dormouse's story</title></head><body><p class="title"><b>The Dormouse's story</b></p><p class="story">Once upon a time there were three little sisters; and their names were<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;and they lived at the bottom of a well.</p><p class="story">...</p>"""soup = BeautifulSoup(html_doc,'html.parser',from_encoding='utf-8')print'获取所有连接'links = soup.find_all('a')for link in links: print link.name,link['href'],link.get_text()print'获取lacie的连接'link_node=soup.find('a',href='http://example.com/lacie')print link_node.name,link_node['href'],link_node.get_text()print'正则匹配'link_node=soup.find('a',href=re.compile(r"ill"))print link_node.name,link_node['href'],link_node.get_text()print'p段落文字'p_node=soup.find('a',class_="title")print p_node.name, p_node.get_text()会报错 如下获取所有连接a http://example.com/elsie Elsiea http://example.com/lacie Laciea http://example.com/tillie Tillie获取lacie的连接a http://example.com/lacie Lacie正则匹配a http://example.com/tillie Tilliep段落文字Traceback (most recent call last): File "C:\Users\Administrator\workspace\2.7\66\test_bs4.py", line 34, in <module> print p_node.name, p_node.get_text()AttributeError: 'NoneType' object has no attribute 'name'
添加回答
举报
0/150
提交
取消