importurllib,urllib2,cookielibimportos,re,time_url_zhihu='http://www.zhihu.com'_url_email_login=_url_zhihu+'/login/email'_captcha_site=_url_zhihu+'/captcha.gif'_header={'Host':'www.zhihu.com','Origin':'http://www.zhihu.com','Pragma':'no-cache','Referer':'http://www.zhihu.com/','User-Agent':'Mozilla/5.0(Macintosh;IntelMacOSX10_10_4)AppleWebKit/537.36(KHTML,likeGecko)Chrome/45.0.2454.93Safari/537.36','X-Requested-With':'XMLHttpRequest','Accept':'*/*','Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6','Cache-Control':'no-cache','Connection':'keep-alive','Content-Length':'111'}defgetXSRF(data):'''从返回的文件中使用正则表达式解析出_xsrf可以考虑使用BeautifulSoup'''cer=re.compile('name=\"_xsrf\"value=\"(.*)\"',flags=0)strlist=cer.findall(data)returnstrlist[0]defget_xsrf_opener(head):'''接受header字典,并返回构造好的opener和解析出的_xsrf'''cookie=cookielib.CookieJar()handler=urllib2.HTTPCookieProcessor(cookie)opener=urllib2.build_opener(handler)response=opener.open(_url_zhihu)xsrf=getXSRF(response.read())withopen('xsrf','w')asf:f.write(xsrf)header=[]forkey,valueinhead.items():elme=(key,value)header.append(elme)opener.addheaders=headerreturnxsrf,openerdefget_captcha_url():return_captcha_site+str(int(time.time())*1000)defget_captcha(url):request=urllib2.Request(url)response=urllib2.urlopen(url)withopen('captcha.gif','wb')asf:f.write(response.read())deflogin(opener,xsrf='',email='',password='',captcha='',rememberme='true'):'''用来登录知乎的程序,包括返回的xsrf,email,password'''values={'email':email,'password':password,'captcha':captcha,'rememberme':rememberme,'_xsrf':xsrf}data=urllib.urlencode(values)request=urllib2.Request(_url_email_login,data)response=opener.open(request)returnresponse.read()def_init():'''主程序'''email=raw_input('email:')password=raw_input('password:')xsrf,opener=get_xsrf_opener(_header)get_captcha(_captcha_site)captcha=raw_input('captcha:')f=login(opener,xsrf,email,password,captcha)printfif__name__=='__main__':_init()————————————————————————————————————————运行后返回的信息{"r":1,"errcode":1991829,"data":{"captcha":"\u9a8c\u8bc1\u7801\u9519\u8bef"},"msg":"\u9a8c\u8bc1\u7801\u9519\u8bef"}信息提示是验证码错误,但是已经传入了验证码,为何还是不能通过(⊙_⊙)?,求Python大神给予指点O(∩_∩)O谢谢
2 回答
森林海
TA贡献2011条经验 获得超2个赞
OCR图片识别库:TesseractOCRPython封装的接口:pytesseract来个示例:#!/usr/bin/envpython#-*-coding:utf-8-*-try:importImageexceptImportError:fromPILimportImageimportpytesseractprint(pytesseract.image_to_string(Image.open('demo.jpeg')))我试了一下,知乎的验证码挺难识别出来,不过可以训练TesseractOCR,让它达到更高的识别率。下面是一些参考的文章:TesseractOCR初探验证码识别工具-tesseract
呼如林
TA贡献1798条经验 获得超3个赞
你这样是不行的,每次请求的验证码都是随机的,你自己模拟的和自己在页面上的请求不是同一个请求,验证码肯定是不一样的。试一下orc工具获取验证码登陆时设置cookie
添加回答
举报
0/150
提交
取消