Pythonの爬虫類学習(XVIII)人人網クッキーログイン

import requests
from lxml import etree
from codeClass import YDMHttp


#封装打码平台代码
path = 'code.jpg'
def getCodeText(imgPath,codeType):
    # 用户名
    username = '********'

    # 密码
    password = '******'

    # 软件ID,开发者分成必要参数。登录开发者后台【我的软件】获得!
    appid = 9812

    # 软件密钥,开发者分成必要参数。登录开发者后台【我的软件】获得!
    appkey = '3dfbf90******0d982ffb1c93'

    # 图片文件
    filename = imgPath

    # 验证码类型,# 例:1004表示4位字母数字,不同类型收费不同。请准确填写,否则影响识别率。在此查询所有类型 http://www.yundama.com/price.html
    codetype = codeType

    # 超时时间,秒
    timeout = 20
    result = None
    # 检查
    if (username == 'username'):
        print('请设置好相关参数再测试')
    else:
        # 初始化
        yundama = YDMHttp(username, password, appid, appkey)

        # 登陆云打码
        uid = yundama.login();
        print('uid: %s' % uid)

        # 查询余额
        balance = yundama.balance();
        print('余额: %s' % balance)

        # 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果
        cid, result = yundama.decode(filename, codetype, timeout);
        print('cid: %s, result: %s' % (cid, result))
    return result


#创建一个session对象
session = requests.session()

#1、对验证码图片进行捕获和识别
headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
    }
url = "http://www.renren.com/SysHome.do"


page_text=requests.get(url=url,headers=headers).text
tree = etree.HTML(page_text)
code_img_src = tree.xpath('//*[@id="verifyPic_login"]/@src')[0]
code_img_data = requests.get(url=code_img_src,headers=headers).content
with open('./code.jpg','wb') as fp:
    fp.write(code_img_data)

#使用云打码对验证码进行识别
result = getCodeText('code.jpg',5000)
print(result)

#post请求的发送(模拟登录)
login_url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2020021132191'
data={
    'email': '144******[email protected]',
    'icode':result,
    'origURL': 'http://www.renren.com/home',
    'domain': 'renren.com',
    'key_id': '1',
    'captcha_type': 'web_login',
    'password': '7d279330f75******a27929432e3928dee4697792f3b0',
    'rkey': '8a9fbb66f55b09f87d64c8111173b047',
    'f': 'http%3A%2F%2Fwww.renren.com%2F422267891%2Fprofile'
}
#使用session对象进行pose请求发送
response=session.post(url = login_url,headers=headers,data=data)
print(response.status_code)
# login_page_text = response.text
# with open('renren.html','w',encoding='utf-8')as fp:
#     fp.write(login_page_text)

#爬取当用户个人主页对应的页面数据
# datali_url = 'http://www.renren.com/422******91/profile'
# datail_page_text=requests.get(url=datali_url,headers=headers).text
# with open('xinxi.html','w',encoding='utf_8') as  fp:
#     fp.write(datail_page_text)

#爬取当前用户的个人主页对应的页面数据
detail_url = "http://www.renren.com/422******91/profile"
#手动cookie处理,通过抓包工具获取cookie并封装
# hearders = {
#     'Cookie':'anonymid=k5m1u0gbnt93eb; _r01_=1; taihe_bi_sdk_uid=6a4882919050d9979b2b4c7d57fd11cc; __utma=151146938.876193738.1579500553.1579500553.1579500553.1; __utmz=151146938.1579500553.1.1.utmcsr=renren.com|utmccn=(referral)|utmcmd=referral|utmcct=/; _ga=GA1.2.876193738.1579500553; jebe_key=c83edb03-b9ac-43ed-ad6f-78af6f194b74%7C4ac7eb0a940e0112ea703de4ac164901%7C1579501523827%7C1%7C1579501523923; _de=31E04E93103A701B1DE9EF59AA5E391C6DEBB8C2103DE356; depovince=GW; jebecookies=16c8bca8-6828-4f5d-ab23-c9292647c1c5|||||; JSESSIONID=abcyFnFLuBhsZeuLAv9cx; ick_login=a4b4027e-e9fc-4fc8-85a4-3c1659a8a595; taihe_bi_sdk_session=6cbdd9d7c5bcbb7ed0187441d212c9ad; p=ada0140d5dd256c429fe2cb86b0a97e41; first_login_flag=1; ln_uact=144******[email protected]; ln_hurl=http://head.xiaonei.com/photos/0/0/men_main.gif; t=1b56adb9ba187ed088a76ffdbf3728941; societyguester=1b56adb9ba187ed088a76ffdbf3728941; id=422267891; xnsid=42a947c9; ver=7.0; loginfrom=null; jebe_key=c83edb03-b9ac-43ed-ad6f-78af6f194b74%7C4ac7eb0a940e0112ea703de4ac164901%7C1583724542889%7C1%7C1583724544491; wp_fold=0'
# }
#使用携带coolie的serssion进行get请求的发送
detai_page_test = session.get(url=detail_url,headers=headers).text
with open('bobo.html','w',encoding='utf-8') as fp:
        fp.write(detai_page_test)
公開された23元の記事 ウォンの賞賛0 ビュー662

おすすめ

転載: blog.csdn.net/haimian_baba/article/details/104752053