#环境python3.6
# 爬取APP 无忧借条
# 通过用户账号密码爬取用户个人信息(我这账号密码已打码)
爬虫部分代码
# coding:utf8 import hashlib import time import urllib3 import json headers = {'Accept': ' */*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,en-US;q=0.8', 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Redmi 3S Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 jinjiedao'} origin_data = {} # 原始数据 result={} #提取后数据 http = urllib3.PoolManager() # http连接池 urllib3.disable_warnings() #禁用各种urllib3警告 # 获取token def login(phone,password): login_url='https://www.gushistory.com/jjdApi/user/login' headers['refer'] = 'https://www.gushistory.com/jjdapp/html/register/login.html?telephone=' + phone md5 = hashlib.md5() md52 = hashlib.md5() md5.update(password.encode(encoding='utf-8')) # 第一次md5 md52.update(md5.hexdigest().encode(encoding='utf-8')) # 第二次md5 # 创建登陆参数字典 data={'c_telephone': phone, 'c_pwd': md52.hexdigest()} response=excute(login_url,'GET',data) origin_data['login'] = response return response['token'] # 获取登陆后的信息 def getAccountInfo(token): account_url='https://www.gushistory.com/jjdApi/user/getHomepageInfo' headers['refer'] = 'https://www.gushistory.com/jjdapp/html/index.html' data = {'token': token} response = excute(account_url, 'GET', data) origin_data['account'] = response user_info = response['userInfo'] result['balance'] = user_info['n_left_amt'] # 余额 result['wait_repay_amt'] = user_info['n_to_repay_amt'] # 待还金额 result['wait_receive'] = user_info['n_to_receive_amt'] # 代收金额 result['lender_cnt'] = response['borrowInfo']['n_lender_cnt'] # 出借金额 result['phone'] = user_info['c_telephone'] # 手机号 if 'c_name' in user_info: # 名称 result['name'] = user_info['c_name'] else: result['name'] = '未实名' if 'c_id_card_no' in user_info: # 身份证号 result['id_card'] = user_info['c_id_card_no'] return result def excute(url, method, data): if data == None: data = {} data['timestamp'] = int(round(time.time() * 1000)) r = http.request(url=url, method=method, headers=headers, fields=data) response = json.loads(r.data.decode('utf-8')) if response['code'] == 200: return response['object'] else: response['error'] = '处理失败' return response if __name__=="__main__": phone='1516842****' pwd='123456' token = login(phone, pwd) getAccountInfo(token)
运行完结果为json数据
以上是爬虫部分,接下来先把API接口模板写好:
from flask import Flask, g from flask_restful import reqparse, Api, Resource from flask_httpauth import HTTPTokenAuth # Flask相关变量声明 app = Flask(__name__) api = Api(app) # RESTfulAPI的参数解析 -- put / post参数解析 parser_put = reqparse.RequestParser() parser_put.add_argument("user", type=str, required=True, help="need user data") parser_put.add_argument("pwd", type=str, required=True, help="need pwd data")
#这部分是爬虫功能
def to_do(arg1, args2):
return "this is info"
# 操作(post / get)资源列表
class TodoList(Resource):
def post(self):
"""
添加一个新用户: curl http://127.0.0.1:5000/users -X POST -d "name=Brown&age=20" -H "Authorization: token fejiasdfhu"
"""
args = parser_put.parse_args()
# 构建新用户
user = args['user']
pwd = args['pwd']
info = {"info": to_do(user, pwd)}
# 资源添加成功,返回201
return info, 201
# 设置路由,即路由地址为http://127.0.0.1:5000/users
api.add_resource(TodoList, "/users")
if __name__ == "__main__":
app.run(debug=True)
接下来将爬虫部分镶嵌到到模板里面,完整代码如下:
from flask import Flask, g from flask_restful import reqparse, Api, Resource from flask_httpauth import HTTPTokenAuth import hashlib import time import urllib3 import json # Flask相关变量声明 app = Flask(__name__) api = Api(app) # RESTfulAPI的参数解析 -- put / post参数解析 parser_put = reqparse.RequestParser() parser_put.add_argument("user", type=str, required=True, help="need user data") parser_put.add_argument("pwd", type=str, required=True, help="need pwd data") headers = {'Accept': ' */*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,en-US;q=0.8', 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Redmi 3S Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 jinjiedao'} origin_data = {} # 原始数据 result = {} # 提取后数据 http = urllib3.PoolManager() # http连接池 urllib3.disable_warnings() # 禁用各种urllib3警告 # 获取token def login(phone, password): login_url = 'https://www.gushistory.com/jjdApi/user/login' headers['refer'] = 'https://www.gushistory.com/jjdapp/html/register/login.html?telephone=' + phone md5 = hashlib.md5() md52 = hashlib.md5() md5.update(password.encode(encoding='utf-8')) # 第一次md5 md52.update(md5.hexdigest().encode(encoding='utf-8')) # 第二次md5 # 创建登陆参数字典 data = {'c_telephone': phone, 'c_pwd': md52.hexdigest()} response = excute(login_url, 'GET', data) origin_data['login'] = response return response['token'] # 获取登陆后的信息 def getAccountInfo(token): account_url = 'https://www.gushistory.com/jjdApi/user/getHomepageInfo' headers['refer'] = 'https://www.gushistory.com/jjdapp/html/index.html' data = {'token': token} response = excute(account_url, 'GET', data) origin_data['account'] = response user_info = response['userInfo'] result['balance'] = user_info['n_left_amt'] # 余额 result['wait_repay_amt'] = user_info['n_to_repay_amt'] # 待还金额 result['wait_receive'] = user_info['n_to_receive_amt'] # 代收金额 result['lender_cnt'] = response['borrowInfo']['n_lender_cnt'] # 出借金额 result['phone'] = user_info['c_telephone'] # 手机号 if 'c_name' in user_info: # 名称 result['name'] = user_info['c_name'] else: result['name'] = '未实名' if 'c_id_card_no' in user_info: # 身份证号 result['id_card'] = user_info['c_id_card_no'] return result def excute(url, method, data): if data == None: data = {} data['timestamp'] = int(round(time.time() * 1000)) r = http.request(url=url, method=method, headers=headers, fields=data) response = json.loads(r.data.decode('utf-8')) if response['code'] == 200: return response['object'] else: response['error'] = '处理失败' return response def to_do(phone, password): token = login(phone, password) return getAccountInfo(token) # 操作(post / get)资源列表 class TodoList(Resource): def post(self): """ 添加一个新用户: curl http://127.0.0.1:5000/users -X POST -d "name=Brown&age=20" -H "Authorization: token fejiasdfhu" """ args = parser_put.parse_args() # 构建新用户 user = args['user'] pwd = args['pwd'] info = {"info": to_do(user, pwd)} # 资源添加成功,返回201 return info, 201 # 设置路由 api.add_resource(TodoList, "/users") if __name__ == "__main__": app.run(debug=True)
这是用postman测试工具模拟浏览器请求,得出的结果