Python crawler, what about request parameter encryption?

Table of contents

Background introduction:

Destination URL:

Page Analysis:

The idea of ​​reverse analysis of encryption parameters

Code:

code_js.js

JS way to achieve

python code implementation

Summarize:

                                   I'm Zheng Yin looking forward to your attention


Background introduction:

Hello everyone, my name is Zheng Yin.

When we request the interface, we find that the number of request parameters is encrypted. What should we do? Today, I will introduce two ways to complete the decryption of request parameters, one is to debug js and reverse parsing, and the other is to parse request parameters by Python.

Destination URL:

https://www.oklink.com/zh-cn/btc/tx-list?limit=20&pageNum=1

Page Analysis:

Determining the Data Interface Address

Determine whether request headers and request parameters are encrypted

Determine if the returned data is encrypted

The idea of ​​reverse analysis of encryption parameters

Locate the js file where the encrypted parameters are located

open js in sources panel

Determine where encryption parameters are located

Breakpoint, refresh the page

Determine the location of the getApiKey method

rewrite js

Complete the js code and change it into a function

run js

Code:

code_js.js

function getApiKey() {
    var t = (new Date).getTime()
      , e = encryptApiKey();
    return t = encryptTime(t),
    comb(e, t)
}
// encryptApiKeya
function encryptApiKey() {
    var t = "a2c903cc-b31e-4547-9299-b6d07b7631ab"
      , e = t.split("")
      , r = e.splice(0, 8);
    return e.concat(r).join("")
}
// encryptApiKey
function encryptTime(t) {
    var e = (1 * t + 1111111111111).toString().split("")
      , r = parseInt(10 * Math.random(), 10)
      , n = parseInt(10 * Math.random(), 10)
      , o = parseInt(10 * Math.random(), 10);
    return e.concat([r, n, o]).join("")
}
// comb
function comb(t, e) {
    var r = "".concat(t, "|").concat(e);
    return btoa(r)
}

// 调用函数运行
// console.log(getApiKey())

JS way to achieve

import requests
import json
import execjs
from jsonpath import jsonpath
import time


# 第一页 https://www.oklink.com/api/explorer/v1/btc/transactionsNoRestrict?t=1657362656709&limit=20&offset=0
# 第二页 https://www.oklink.com/api/explorer/v1/btc/transactionsNoRestrict?t=1657362632969&limit=20&offset=20
# 第三页 https://www.oklink.com/api/explorer/v1/btc/transactionsNoRestrict?t=1657362567236&limit=20&offset=40
# 第四页 https://www.oklink.com/api/explorer/v1/btc/transactionsNoRestrict?t=1657362796076&limit=20&offset=60

def parse(offset):
    ## 目标地址:'https://www.oklink.com/zh-cn/btc/tx-list?limit=20&pageNum=1'
    with open('./code_js.js', 'r', encoding='utf-8') as f:
        js_code = f.read()
    # compile 调用文件,call 调用getApiKey函数
    apiKey = execjs.compile(js_code).call('getApiKey')
    # print(apiKey)
    header = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
        'x-apiKey': apiKey
    }
    # t 是当前时间的数据戳
    t = str(time.time() * 1000)[:-5]
    # print(t)
    data = {
        't': t,
        'limit': 20,
        'offset': offset
    }
    # 数据接口地址
    url = 'https://www.oklink.com/api/explorer/v1/btc/transactionsNoRestrict'
    res = requests.get(url, headers=header, params=data).text
    # json字符串数据,转为python字典数据
    dict_data = json.loads(res)
    # print(json_data)
    # 数据提取
    # 交易哈希
    hash_list = jsonpath(dict_data, "$..hash")
    # 所在区块
    blockHeight_list = jsonpath(dict_data, "$..blockHeight")
    # 数量(BTC)
    inputsValue_list = jsonpath(dict_data, "$..inputsValue")
    # 手续费(BTC)
    fee_list = jsonpath(dict_data, "$..fee")
    print(hash_list, blockHeight_list, inputsValue_list, fee_list)


# 爬取3页
for i in range(1, 4):
    print(f'正在爬取第{i}页')
    offset = (i - 1) * 20
    parse(offset)

python code implementation

import base64
import random

import requests
import json
from jsonpath import jsonpath
import time

def getApiKey():
    # 13位的时间戳
    t = int(str(time.time() * 1000)[:-5])
    e = encryptApiKey()
    # print('t的值', e)
    t = encryptTime(t)
    # print('e的值', t)
    return comb(e, t)


def encryptApiKey():
    t = "a2c903cc-b31e-4547-9299-b6d07b7631ab"
    # e是t后28个字符组成的列表
    e = [j for j in t[8:]]
    # r是前8个字符组成的列表
    r = [j for j in t[:8]]
    # 返回值就是 e拼接r组成的字符串
    e.extend(r)
    return ''.join(e)


def encryptTime(t):
    # e 为 (1 * t + 1111111111111)的结果转为字符串的每个字符组成的列表
    e = [j for j in str(1 * t + 1111111111111)]
    #  r、n、o 为 随机数字(0-9)
    r = str(random.randint(0, 9))
    n = str(random.randint(0, 9))
    o = str(random.randint(0, 9))
    # 返回的结果是 e,r, n, o 拼接后的字符串
    e.extend(list(r + o + n))
    return ''.join(e)


def comb(t, e):
    # r 是t 和"|" 和e 拼接之后的字符串
    r = t + '|' + e
    # 返回的是base64编码的字符串
    return base64.b64encode(r.encode()).decode()


def parse(offse):
    apikey = getApiKey()
    print(apikey)
    # print(apiKey)
    header = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
        'x-apiKey': apikey
    }
    # t 是当前时间的数据戳
    t = str(time.time() * 1000)[:-5]
    # print(t)
    data = {
        't': t,
        'limit': 20,
        'offset': offse
    }
    # 数据接口地址
    url = 'https://www.oklink.com/api/explorer/v1/btc/transactionsNoRestrict'
    res = requests.get(url, headers=header, params=data).text
    # json字符串数据,转为python字典数据
    dict_data = json.loads(res)
    # print(json_data)
    # 数据提取
    # 交易哈希
    hash_list = jsonpath(dict_data, "$..hash")
    # 所在区块
    blockHeight_list = jsonpath(dict_data, "$..blockHeight")
    # 数量(BTC)
    inputsValue_list = jsonpath(dict_data, "$..inputsValue")
    # 手续费(BTC)
    fee_list = jsonpath(dict_data, "$..fee")
    print(hash_list, blockHeight_list, inputsValue_list, fee_list)


# 爬取3页
for i in range(1, 3):
    print(f'正在爬取第{i}页')
    offset = (i - 1) * 20
    parse(offset)

Summarize:

Through this case, we can know how to determine the address of the data interface, how to determine which js file the encryption parameters are located in, and understand the js debugging process. Request parameter parsing can be achieved by debugging js reverse parsing and Python. The principle of python implementation is to write the js writing function in python.

                                   I'm Zheng Yin, looking forward to your attention

Guess you like

Origin blog.csdn.net/m0_69043821/article/details/125759430