微博模拟登陆

因公司需求,需要爬取微博相关大v的账号以及相关信息,一开始是直接爬取,发现只可以爬取10条数据(没有登陆的情况下),所以就涉及到模拟登陆。

import requests
import json
import base64
from lxml import etree
from bs4 import BeautifulSoup
import time

def login(username, password):
    username = base64.b64encode(username.encode('utf-8')).decode('utf-8')
    postData = {
        "entry": "sso",
        "gateway": "1",
        "from": "null",
        "savestate": "30",
        "useticket": "0",
        "pagerefer": "",
        "vsnf": "1",
        "su": username,
        "service": "sso",
        "sp": password,
        "sr": "1440*900",
        "encoding": "UTF-8",
        "cdult": "3",
        "domain": "sina.com.cn",
        "prelt": "0",
        "returntype": "TEXT",
    }
    loginURL = r'https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.15)'
    session = requests.Session()
    res = session.post(loginURL, data = postData)
    jsonStr = res.content.decode('gbk')
    info = json.loads(jsonStr)
    if info["retcode"] == "0":
        print("登录成功")
        # 把cookies添加到headers中,必须写这一步,否则后面调用API失败
        cookies = session.cookies.get_dict()
        cookies = [key + "=" + value for key, value in cookies.items()]
        cookies = "; ".join(cookies)
        session.headers["cookie"] = cookies
    else:
        print("登录失败,原因: %s" % info["reason"])
    return session

if __name__ == '__main__':

       session = login('your account', 'your password')


猜你喜欢

转载自blog.csdn.net/Alis_xt/article/details/56666203