from bs4 import BeautifulSoup
import requests
import time
info = []
url = 'http://cq.xiaozhu.com/search-duanzufang-p1-0/'
url_save = 'http://cq.xiaozhu.com/fangzi/4783366714.html'
urls = ['http://cq.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1, 14)]
def get_fav(url, data=None):
web_data = requests.get(url)
time.sleep(2)
Soup = BeautifulSoup(web_data.text, 'lxml')
webs = Soup.select('#page_list > ul > li > a[target="_blank"]')
if data is None:
for web in webs:
print(web.get('href'))
def get_info(url_1, data=None):
web_data = requests.get(url_1)
Soup = BeautifulSoup(web_data.text, 'lxml')
titles = Soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
sites = Soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p')
prices = Soup.select('#pricePart > div.day_l > span')
images = Soup.select('#curBigImage')
avatars = Soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
names = Soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a')
sexs = Soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > div')
if data is None:
for title, site, price, image, avatar, name, sex in zip(titles, sites, prices, images, avatars, names, sexs):
data = {
'title': title.get_text(),
'site': site.get('title'),
'price': price.get_text(),
'image': image.get('src'),
'avatar': avatar.get('src'),
'name': name.get_text(),
}
print(data)
for single in urls:
get_fav(single)运行效果出来应该是每个租房链接中,租房名称,租房价格,租房地点,户主照片,户主ID等信息”
添加回答
举报
0/150
提交
取消