首页手记基于python的新浪微博模拟登陆

基于python的新浪微博模拟登陆

标签：

Python

主文件如下：

#coding=utf-8

import requests

import urllib

import urllib2

import cookielib

import WeiboEncode

import WeiboSearch

import time

import re

import random

import httplib

class WeiboLogin:

def __init__(self, user, pwd, enableProxy = False):#构造方法，参数依次是自身、用户、密码、是否使用代理服务器

"初始化WeiboLogin，enableProxy表示是否使用代理服务器，默认关闭"

print "Initializing WeiboLogin..."

self.userName = user

self.passWord = pwd

self.enableProxy = enableProxy

self.serverUrl = "http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=&rsakt=mod&client=ssologin.js(v1.4.11)&_=1379834957683"

self.loginUrl = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.11)"

self.postHeader = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0'}

#用户代理 User Agent，是指浏览器,它的信息包括硬件平台、系统软件、应用软件和用户个人偏好。

def Login(self):#登陆程序

"登陆程序"

self.EnableCookie(self.enableProxy)#cookie或代理服务器配置

serverTime, nonce, pubkey, rsakv = self.GetServerTime()#登陆的第一步

postData = WeiboEncode.PostEncode(self.userName, self.passWord, serverTime, nonce, pubkey, rsakv)#加密用户和密码

print "Post data length:\n", len(postData)

req = urllib2.Request(self.loginUrl, postData, self.postHeader)#构造网络请求

print "Posting request..."

result = urllib2.urlopen(req)#发出网络请求

text = result.read()

try:

loginUrl = WeiboSearch.sRedirectData(text)#解析重定位结果（登陆后自动跳转到的页面）

urllib2.urlopen(loginUrl)

except:

print 'Login error!'

return False

print 'Login sucess!'

return True

def EnableCookie(self, enableProxy):#"Enable cookie & proxy (if needed)."

cookiejar = cookielib.LWPCookieJar()#建立cookie

cookie_support = urllib2.HTTPCookieProcessor(cookiejar)

#HTTPCookieProcessor instances have one attribute:

#HTTPCookieProcessor.cookiejar (The cookielib.CookieJar in which cookies are stored.)

if enableProxy:

proxy_support = urllib2.ProxyHandler({'http':'59.59.100.123:8118'})#使用代理

opener = urllib2.build_opener(proxy_support, cookie_support, urllib2.HTTPHandler)#Return an OpenerDirector instance

#The OpenerDirector class opens URLs via BaseHandlers chained together.

print "Proxy enabled"

else:

opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)

urllib2.install_opener(opener)#构建cookie对应的opener

def GetServerTime(self):#"Get server time and nonce, which are used to encode the password"

#在摘要认证中服务器让客户选一个随机数（称作”nonce“），然后浏览器使用一个单向的加密函数生成一个消息摘要（message #digest），该摘要是关于用户名、密码、给定的nonce值、HTTP方法，以及所请求的URL。

print "Getting server time and nonce..."

serverData = urllib2.urlopen(self.serverUrl).read()#得到网页内容

print serverData

try:

serverTime, nonce, pubkey, rsakv = WeiboSearch.sServerData(serverData)#解析得到serverTime，nonce等

return serverTime, nonce, pubkey, rsakv

except:

print 'Get server time & nonce error!'

return None

def fetch_weibo(id, filename):#不借助API取回微博列表，但只有前几条，参数分别为用户ID、文件名

target = open(filename, 'a')

myurl='http://weibo.com/u/'+id

line = urllib2.urlopen(myurl).read()

target.write(line)

if re.search(r'\"WB_detail', line):

print "success"

p = re.compile(r'\"WB_detail\"')

linelist = p.split(line)

for fraction in linelist:

matchObj = re.search(r'nick-name=\".+?\">\\n +(.+?)<', fraction)

if matchObj:

target.write(matchObj.group(1))

target.write("\n")

def fetchqueryresult():#本方法可取回微博找人的查询结果

myurl="http://s.weibo.com/user/&auth=ord&age=22y&gender=women&region=custom:33:1&page="#找人页面的url

target = open("filename", 'a')#输出文件名称

for i in range(37,51):#起止页码

line = urllib2.urlopen(myurl).read()

while re.search(r'ids\=(\d+?)\\', line):

matchObj = re.search(r'ids\=(\d+?)\\', line)

print matchObj.group(1)

target.write(matchObj.group(1))

target.write("\n")

p = re.compile(r''+matchObj.group(1))

linelist = p.split(line)

line = linelist[len(linelist)-1]

print i

time.sleep(2+random.random());

def getjson():#本方法可调用微博API，取回已登录用户的微博列表

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0'}#定义一些文件头

url = "https://api.weibo.com/2/statuses/user_timeline.json" # 这里是url

your_param = {'source': '1675437817'} # 这里是请求参数！

result = requests.get(url, params=your_param) # 发送请求，如果url是http://s.weibo.com/weibo/s 那么这句话的的效果就是 http://s.weibo.com/weibo/s?Refer=sina_index

result_final = result.text #这样就获取到了你发送的这个URL + 参数之后的结果

print result.text

if __name__ == '__main__':

#if the python interpreter is running that module (the source file) as the main program,

#it sets the special __name__ variable to have a value #"__main__".

#If this file is being imported from another module,

#__name__ will be set to the module's name.

weiboLogin = WeiboLogin('tanglie23@163.com', 'XXXXXXXX')#邮箱（账号）、密码

if weiboLogin.Login() == True:

print "登陆成功！"

myurl="http://api.weibo.com/2/statuses/timeline_batch.json?source=1675437817&uids=5029941840"

htmlContent = urllib2.urlopen(myurl).read()

print htmlContent

另外要用到的两个类是WeiboSearch.py和WeiboEncode.py。

WeiboEncode.py代码如下：

#coding=utf-8

import urllib

import base64

import rsa

import binascii

def PostEncode(userName, passWord, serverTime, nonce, pubkey, rsakv):

"Used to generate POST data"

encodedUserName = GetUserName(userName)#用户名使用base64加密

encodedPassWord = get_pwd(passWord, serverTime, nonce, pubkey)#目前密码采用rsa加密

postPara = {

'entry': 'weibo',

'gateway': '1',

'from': '',

'savestate': '7',

'userticket': '1',

'ssosimplelogin': '1',

'vsnf': '1',

'vsnval': '',

'su': encodedUserName,

'service': 'miniblog',

'servertime': serverTime,

'nonce': nonce,

'pwencode': 'rsa2',

'sp': encodedPassWord,

'encoding': 'UTF-8',

'prelt': '115',

'rsakv': rsakv,

'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',

'returntype': 'META'

}

postData = urllib.urlencode(postPara)#网络编码

return postData

def GetUserName(userName):

"Used to encode user name"

userNameTemp = urllib.quote(userName)

userNameEncoded = base64.encodestring(userNameTemp)[:-1]

return userNameEncoded

def get_pwd(password, servertime, nonce, pubkey):

rsaPublickey = int(pubkey, 16)

key = rsa.PublicKey(rsaPublickey, 65537) #创建公钥

message = str(servertime) + '\t' + str(nonce) + '\n' + str(password) #拼接明文js加密文件中得到

passwd = rsa.encrypt(message, key) #加密

passwd = binascii.b2a_hex(passwd) #将加密信息转换为16进制。

return passwd

WeiboSearch.py

#coding=utf-8

import re

import json

def sServerData(serverData):#解析得到serverTime，nonce等

"Search the server time & nonce from server data"

p = re.compile('\((.*)\)') #re.compile 可以把正则表达式编译成一个正则表达式对象

jsonData = p.search(serverData).group(1) #查找

data = json.loads(jsonData) #对encodedjson进行decode，得到原始数据，需要使用json.loads()函数

serverTime = str(data['servertime'])

nonce = data['nonce']

pubkey = data['pubkey']#

rsakv = data['rsakv']#

print "Server time is:", serverTime

print "Nonce is:", nonce

return serverTime, nonce, pubkey, rsakv

def sRedirectData(text):

p = re.compile('location\.replace\([\'"](.*?)[\'"]\)')

loginUrl = p.search(text).group(1)

print 'loginUrl:',loginUrl

return loginUrl

目前该爬虫可以自动登录，以及调用新浪微博的普通API。但是批量取回他人的微博需要高级授权，目前正在申请。

点击查看更多内容

为 TA 点赞

若觉得本文不错，就分享一下吧！

评论

评论

共同学习，写下你的评论

评论加载中...

展开查看更多评论

作者其他优质文章

正在加载中

holdtom

手记
篇

粉丝

240

获赞与收藏

992

关注作者，订阅最新文章

阅读免费教程

Python 办公自动化教程

17个小节 25700 869

Python 算法入门教程

15个小节 27419 1071

Python 进阶应用教程

38个小节 65723 1031

推荐

评论

收藏

共同学习，写下你的评论



感谢您的支持，我会继续努力的～

扫码打赏，你说多少就多少

赞赏金额会直接到老师账户

支付方式

打开微信扫一扫，即可进行扫码打赏哦

今天注册有机会得

100积分直接送

付费专栏免费学

大额优惠券免费领

立即参与放弃机会

点击
抽奖

慕课手记新用户专享福利

恭喜你，你的运气太好了，居然抽中了 100个积分！

恭喜你，抽中了价值元的专栏！

太棒了，直接落到你账户里！

积分商城里的罗技鼠标、机械键盘、
Kindle 阅读器、小米平衡车
Apple iPad （10.2英寸）、大额优惠券
在等着你去兑换了噢

作者：

免费赠送

兑换码：1111222211 复制

优惠券可用于购买实战课、体系课
无门槛使用

先去看看，有什么好东西马上兑换我爱学习，选课去


热搜

最近搜索清空

基于python的新浪微博模拟登陆

阅读免费教程