#coding:utf8
from bs4 import BeautifulSoup
import urllib2
import re
import csv
def get_attractions(url,data=None):
web_request = urllib2.urlopen(url)
web_data = web_request.read().decode('gb2312')
soup = BeautifulSoup(web_data,'html.parser')
div_title = soup.find_all('div',class_="title")
div_content = soup.find_all('div',class_="message")
for d_title,d_content in zip(div_title,div_content):
data = {
'div_title':d_title.get_text(),
'div_content':d_content.get_text()
}
for datas in list(data.values()):
a=datas.replace('\n','')
b=a.replace('\r','')
c=b.replace(" ",'')
print c
return c
def write_data(data, name):
file_name = name
with open(file_name, 'wb') as f:
f_csv = csv.writer(f)
f_csv.writerows(data)
url = 'http://liuyan.people.com.cn/list.php?fid=733'
urls = ['http://liuyan.people.com.cn/list.php?fid=733&display=&total=112&page={}'.format(str(i)) for i in range(1,13,1)]
for single_url in urls:
datas = get_attractions(single_url)
write_data(datas, 'liuyan.csv')
添加回答
举报
0/150
提交
取消