from urllib import requestimport reclass Spider(): url = 'https://www.huya.com/g/lol' root_pattern = '<span class="txt">([\s\S]*?)</span>' name_pattern = ' <i class="nick" title="">[\s\S]</i>' number_pattern= '<span class="num">([\s\S])</span>' def __fetch_content(self): r = request.urlopen(Spider.url) htmls = r.read() htmls = str(htmls, encoding='utf-8') return htmls def __analysis(self,htmls): root_html = re.findall(Spider.root_pattern,htmls) anchors = [] for html in root_html: name = re.findall(Spider.name_pattern,html) number = re.findall(Spider.number_pattern,html) anchor = {'name':name} anchors.append(anchor) return anchors def __refine(self,anchors): l= lambda anchor: { 'name':anchor['name'][0].strip(), 'number':anchor['number'][0] } return map(l,anchors) def __soft(self,anchors): anchors = sorted(anchors,key=self.__soft_seed,reverse=True) return anchors def __soft_seed(self,anchor): r = re.findall('\d',anchor['number']) number = float(r[0]) if '万' in anchor['number']: number *=10000 return number def __show(self,anchors): for rank in range(0,len(anchors)): print('rank'+str(rank+1)) +' : '+anchors[rank]['name'] +' '+anchors[rank]['number'] def go(self): htmls = self.__fetch_content() anchors=self.__analysis(htmls) anchors =list(self.__refine(anchors)) anchors = self.__soft(anchors) self.__show(anchors)spider = Spider()spider.go()
添加回答
举报
0/150
提交
取消