Python crawler crawls APP and encapsulates it into API interface calls, using flask-restful

#Environment python3.6

# Crawling APP worry-free IOU

# Crawling the user's personal information through the user account password (my account password has been coded)

Crawler part of the code

# coding:utf8
import hashlib
import time
import urllib3
import json


headers = {'Accept': ' */*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,en-US;q=0.8',
           'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Redmi 3S Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 jinjiedao'}
origin_data = {} # original data
result={} #Data after extraction
http = urllib3.PoolManager() # http connection pool
urllib3.disable_warnings() #Disable various urllib3 warnings

# get token
def login(phone,password):
    login_url='https://www.gushistory.com/jjdApi/user/login'
    headers['refer'] = 'https://www.gushistory.com/jjdapp/html/register/login.html?telephone=' + phone
    md5 = hashlib.md5()
    md52 = hashlib.md5()
    md5.update(password.encode(encoding='utf-8')) # first md5
    md52.update(md5.hexdigest().encode(encoding='utf-8')) # Second md5
    # Create a dictionary of login parameters
    data={'c_telephone': phone, 'c_pwd': md52.hexdigest()}
    response=excute(login_url,'GET',data)
    origin_data['login'] = response
    return response['token']

# Get the information after login
def getAccountInfo(token):
    account_url='https://www.gushistory.com/jjdApi/user/getHomepageInfo'
    headers['refer'] = 'https://www.gushistory.com/jjdapp/html/index.html'
    data = {'token': token}
    response = excute(account_url, 'GET', data)
    origin_data['account'] = response
    user_info = response['userInfo']
    result['balance'] = user_info['n_left_amt']  # 余额
    result['wait_repay_amt'] = user_info['n_to_repay_amt'] # Amount to be repaid
    result['wait_receive'] = user_info['n_to_receive_amt'] # 代收金额
    result['lender_cnt'] = response['borrowInfo']['n_lender_cnt'] # loan amount
    result['phone'] = user_info['c_telephone'] # phone number
    if 'c_name' in user_info:  # 名称
        result['name'] = user_info['c_name']
    else:
        result['name'] = 'Not real name'
    if 'c_id_card_no' in user_info: # ID number
        result['id_card'] = user_info['c_id_card_no']

    return result


def excute(url, method, data):
    if data == None:
        data = {}
    data['timestamp'] = int(round(time.time() * 1000))
    r = http.request(url=url, method=method, headers=headers, fields=data)
    response = json.loads(r.data.decode('utf-8'))
    if response['code'] == 200:
        return response['object']
    else:
        response['error'] = 'Processing failed'
        return response




if __name__=="__main__":
    phone='1516842****'
    pwd='123456'
    token = login(phone, pwd)
    getAccountInfo(token)

The result of running is json data



The above is the crawler part. Next, write the API interface template:

from flask import Flask, g
from flask_restful import reqparse, Api, Resource
from flask_httpauth import HTTPTokenAuth


# Flask related variable declaration
app = Flask(__name__)
fire = Fire(app)

# RESTfulAPI parameter parsing -- put / post parameter parsing
parser_put = reqparse.RequestParser()
parser_put.add_argument("user", type=str, required=True, help="need user data")
parser_put.add_argument("pwd", type=str, required=True, help="need pwd data")

#This part is the crawler function
def to_do(arg1, args2):
    return "this is info"

# Manipulate (post/get) resource list
class TodoList(Resource):

    def post(self):
        """
        Add a new user: curl http://127.0.0.1:5000/users -X POST -d "name=Brown&age=20" -H "Authorization: token fejiasdfhu"
        """
        args = parser_put.parse_args()

        # create new user
        user = args['user']
        pwd = args['pwd']
        info = {"info": to_do(user, pwd)}

        # Resource added successfully, return 201
        return info, 201


# Set the route, that is, the route address is http://127.0.0.1:5000/users
api.add_resource(TodoList, "/users")


if __name__ == "__main__":
    app.run(debug=True)


Next, insert the crawler part into the template. The complete code is as follows:

from flask import Flask, g
from flask_restful import reqparse, Api, Resource
from flask_httpauth import HTTPTokenAuth
import hashlib
import time
import urllib3
import json


# Flask related variable declaration
app = Flask(__name__)
fire = Fire(app)

# RESTfulAPI parameter parsing -- put / post parameter parsing
parser_put = reqparse.RequestParser()
parser_put.add_argument("user", type=str, required=True, help="need user data")
parser_put.add_argument("pwd", type=str, required=True, help="need pwd data")

headers = {'Accept': ' */*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,en-US;q=0.8',
           'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Redmi 3S Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 jinjiedao'}
origin_data = {} # original data
result = {} # Extracted data
http = urllib3.PoolManager() # http connection pool
urllib3.disable_warnings() # Disable various urllib3 warnings


# get token
def login(phone, password):
    login_url = 'https://www.gushistory.com/jjdApi/user/login'
    headers['refer'] = 'https://www.gushistory.com/jjdapp/html/register/login.html?telephone=' + phone
    md5 = hashlib.md5()
    md52 = hashlib.md5()
    md5.update(password.encode(encoding='utf-8')) # first md5
    md52.update(md5.hexdigest().encode(encoding='utf-8')) # Second md5
    # Create a dictionary of login parameters
    data = {'c_telephone': phone, 'c_pwd': md52.hexdigest()}
    response = excute(login_url, 'GET', data)
    origin_data['login'] = response
    return response['token']


# Get the information after login
def getAccountInfo(token):
    account_url = 'https://www.gushistory.com/jjdApi/user/getHomepageInfo'
    headers['refer'] = 'https://www.gushistory.com/jjdapp/html/index.html'
    data = {'token': token}
    response = excute(account_url, 'GET', data)
    origin_data['account'] = response
    user_info = response['userInfo']
    result['balance'] = user_info['n_left_amt']  # 余额
    result['wait_repay_amt'] = user_info['n_to_repay_amt'] # Amount to be repaid
    result['wait_receive'] = user_info['n_to_receive_amt']  # 代收金额
    result['lender_cnt'] = response['borrowInfo']['n_lender_cnt'] # loan amount
    result['phone'] = user_info['c_telephone'] # phone number
    if 'c_name' in user_info:  # 名称
        result['name'] = user_info['c_name']
    else:
        result['name'] = 'Not real name'
    if 'c_id_card_no' in user_info: # ID number
        result['id_card'] = user_info['c_id_card_no']

    return result


def excute(url, method, data):
    if data == None:
        data = {}
    data['timestamp'] = int(round(time.time() * 1000))
    r = http.request(url=url, method=method, headers=headers, fields=data)
    response = json.loads(r.data.decode('utf-8'))
    if response['code'] == 200:
        return response['object']
    else:
        response['error'] = 'Processing failed'
        return response


def to_do(phone, password):
    token = login(phone, password)
    return getAccountInfo(token)




# Manipulate (post/get) resource list
class TodoList(Resource):

    def post(self):
        """
        Add a new user: curl http://127.0.0.1:5000/users -X POST -d "name=Brown&age=20" -H "Authorization: token fejiasdfhu"
        """
        args = parser_put.parse_args()

        # create new user
        user = args['user']
        pwd = args['pwd']
        info = {"info": to_do(user, pwd)}

        # Resource added successfully, return 201
        return info, 201


# set route
api.add_resource(TodoList, "/users")


if __name__ == "__main__":
    app.run(debug=True)


This is the result of simulating browser requests with the postman test tool


Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325560045&siteId=291194637