import requests
from bs4 import BeautifulSoup
import lxml
# url='http://www.qiushibaike.com/'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'}
urls = [];
for i in range(1,36):
url = 'http://www.qiushibaike.com/8hr/page/'+str(i)+'/?s=4940923'
urls.append(url)
def get_substance(url,data=None):
web_data=requests.get(url,headers=headers)
url_data=web_data.text.encode('utf-8')
soup=BeautifulSoup(url_data,'lxml')
names=soup.select('.author > a > h2')
numberds=soup.select('.stats-vote > .number')
contents=soup.select('.content > span')
for name,numberd,content in zip (names,numberds,contents):
data={
'name':list(name.stripped_strings),
'numberd':list(numberd.stripped_strings),
'content':list(content.stripped_strings)
}
print(data)
for single_url in urls:
get_substance(single_url)
添加回答
举报
0/150
提交
取消