【爬虫Practice5】爬取网易云音乐评论

  这里以爬取网易云某一首歌的评论为例,当然爬取歌词等方法也是一模一样的,只是请求的参数不一样,这里比较难的地方就是解密和加密的过程,需要弄清楚。
评论加载、歌词等都是通过Ajax请求来的,但下面的参数被加密了

import requests
import json
from fake_useragent import UserAgent
from Crypto.Cipher import AES 
from base64 import b64encode

if __name__ == "__main__":
    
    url = '"https://music.163.com/weapi/comment/resource/comments/get?csrf_token="'
    # 1.真实参数 
    data = {
    
    
        'rid': "R_SO_4_1294378245",
        'threadId': "R_SO_4_1294378245",
        'pageNo': "1",
        'pageSize': "20",
        'cursor': "-1",
        'offset': "0",
        'orderType': "1",
        'csrf_token': ""
    }
    # 2.处理加密过程 获取params key
    e = '010001'
    f = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
    g = "0CoJUm6Qyw8W8jud"
    # 注意i是要查看的 因为params和key的生成都需要i
    i = "6mGGm0cVaHLsaC79"
    
    # 20.生成16倍数的数据长度
    def to_16(data):
        pad = 16 - len(data) % 16
        data += chr(pad) * pad
        return data
    
    # 21.加密算法
    iv = "0102030405060708"
    def EncryptAES(data, key):
        aes = AES.new(key=key.encode("utf-8"), mode=AES.MODE_CBC, iv=iv.encode("utf-8")) #创建加密器 三个参数都是字节
        data = to_16(data)
        bs = aes.encrypt(data.encode("utf-8")) #不能用decode直接解码 不能被utf-8识别 需要base64进行处理 且加密内容长度必须是16倍数
        return str(b64encode(bs), "utf-8")
 
    # 22.获取params
    def get_encText(data, g, i):
        first = EncryptAES(data, g)
        second = EncryptAES(first, i)
        return second
    
    # 23.获取key    
    def get_encSecKey():
        return "1c8b259c6995466fc7a408070cabd721764f997eade0871dbda28cb552fed9fe00ef96e0e7d1f68db1b5768f882bd4639aa6fd50ae92f0916acdb3f2e9cc1588ec738858b4ca61720cda8e01ddeb158aeac244063cdbc500d5880b59dfbfb13f9e4d38166db22d3c87cf03b286968415e5db7a366a490eb65c8da9de0e98fba9"
    
    # 3.发送请求
    encrypt_data = {
    
    
        'params' : get_encText(data=json.dumps(data), g=g, i=i),
        'encSecKey' : get_encSecKey()
    }
    headers = {
    
    
        'User-Agent':UserAgent().random
    }
    res = requests.post(url=url, data=encrypt_data, headers=headers).content
    print(res)
    with open("./cloudmusic.json",'wb') as fp:
        fp.write(res)

猜你喜欢

转载自blog.csdn.net/lily_i/article/details/121880201