011 python接口 bs4提取结果

'''
时间:2018/11/03
功能:bs4提取结果
目录:
    一: 登录拉勾网
'''
一: 登录拉勾网
# coding:utf-8
import requests
import re
from bs4 import BeautifulSoup
import urllib3
import hashlib
urllib3.disable_warnings()
import sys

class LoginLgw():
    def __init__(self, s):
        self.s = s

    def getToeknCode(self):
        '''
        获取拉勾网 - token和code
        :return:    {"X_Anti_Forge_Token":"xxx", "X_Anti_Forge_Code":"xxx"}
        '''
        # 更新数据 - 头部信息
        url = "https://passport.lagou.com/login/login.html"
        head = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0",
            }
        self.s.headers.update(head)

        # 获取数据 - token和code
        data = self.s.get(url, verify = False)
        soup = BeautifulSoup(data.content, "html.parser", from_encoding="utf-8")
        tokenCode = {}
        try:
            t = soup.find_all("script")[1].get_text()
            tokenCode['X_Anti_Forge_Token'] = re.findall(r"Token = '(.+?)'", t)[0]
            tokenCode['X_Anti_Forge_Code'] = re.findall(r"Code = '(.+?)'", t)[0]
            return tokenCode
        except:
            print("Get Faild")
            tokenCode['X_Anti_Forge_Token'] = ""
            tokenCode['X_Anti_Forge_Code'] = ""
            return tokenCode

    def encryptPwd(self, passwd):
        '''
        密码加密
        :param passwd: 原始密码
        :return:       加密密码
        '''
        passwd = hashlib.md5(passwd.encode("utf-8")).hexdigest()    # md5加密
        passwd = "veenike" + passwd + "veenike"                     # veennike : js文件 - 写死的值
        passwd = hashlib.md5(passwd.encode("utf-8")).hexdigest()    # md5加密
        return passwd

    def login(self, user, password):
        '''
        登录拉勾网
        :param user:        用户名
        :param password:    密码
        :return:        登录成功 - 服务信息
        '''
        # 获取数据 - token和code
        tokenCode = self.getToeknCode()
        print(tokenCode)

        # 密码加密
        password = self.encryptPwd(password)
        print(password)

        # 更新数据 - 头部信息
        url = "https://passport.lagou.com/login/login.json"
        head = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0",
        "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
        "X-Requested-With": "XMLHttpRequest",
        "X-Anit-Forge-Token": tokenCode['X_Anti_Forge_Token'],
        "X-Anit-Forge-Code": tokenCode['X_Anti_Forge_Code'],
        "Referer": "https://passport.lagou.com/login/login.html",
        }
        self.s.headers.update(head)

        # 登录网站
        body = {
                "isValidate":'true',
                "username": user,
                "password": password,
                "request_form_verifyCode": "",
                "submit": ""
        }
        r = self.s.post(url, data=body, verify=False)
        try:
            print(r.text)
            return r.json()
        except:
            print("login faild: %s" %r.text)
            return None

if __name__ == "__main__":
    s = requests.session()
    lgw = LoginLgw(s)
    lgw.login("user", "password")
{'X_Anti_Forge_Token': 'a2724c9e-40be-493b-84a9-227a3cafe955', 'X_Anti_Forge_Code': '77786694'}
d45ef25791078e956e6915ba194d776a
{"content":{"rows":[]},"message":"操作成功","state":1,"submitCode":76585064,"submitToken":"1e756b35-4bbe-4853-b1b8-767042f86771"}
 

猜你喜欢

转载自www.cnblogs.com/huafan/p/9900261.html
今日推荐