import urllib.requestimport osdef url_open(url): req = urllib.request.Request(url) req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0') response = urllib.request.urlopen(url) html = response.read() return html def get_page(url): html = url_open(url).decode('utf-8') a = html.find('thisclass') + 12 b = html.find('<',a) return html[a:b] def find_imgs(url): html = url_open(url).decode('utf-8') img_addrs = [] a = html.find('img src=') while a != -1: b = html.find('.jpg',a,a+255) if b != -1: img_addrs.append(html[a+9:b+4]) else: b = a + 9 a = html.find('img src=',b) return img_addrs def sava_imgs(folder,img_addrs): for each in img_addrs: filename = each.split('/')[-1] print(filename) with open(filename,'wb') as f: img = url_open(each) f.write(img) def download_mm(folder = 'ooxx',pages = 10): os.mkdir(folder) os.chdir(folder) url = 'http://www.meizitu.com/a/' page_num = int(get_page(url)) for i in range(pages): page_num -= i page_url = url + 'more_' + str(page_num) + '.html' img_addrs = find_imgs(page_url) sava_imgs(folder,img_addrs) if __name__ == '__main__': download_mm() 报错是这样的:UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 255: invalid continuation byte
添加回答
举报
0/150
提交
取消