网易云音乐歌曲评论爬虫

上一篇文章我已经教大家一种比较偷懒的方式爬取网易云歌曲链接 , 接下来我们继续以这种方式爬取网易云的评论 , 歌单 , 专辑 和搜索结果 这一篇主要讲网易云的评论爬取 ,

打开要爬的歌曲网页 , 右键检查 , 进入network选项卡 , 刷新页面
在这里插入图片描述
在这里插入图片描述
通过上一篇的分析我们知道整个网易云音乐post参数的获取只需要一个变量就行了,其他的参数都是死的 , 好了继续使用之前的方法看看可变的那个参数是什么吧:

d函数进入 d => {"rid":"R_SO_4_1342798229","offset":"0","total":"true","limit":"20","csrf_token":""}  e=>010001 f=>00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7  g=>0CoJUm6Qyw8W8jud
a函数进入 a => 16
b函数进入 a => {"rid":"R_SO_4_1342798229","offset":"0","total":"true","limit":"20","csrf_token":""}  b=>0CoJUm6Qyw8W8jud
b函数进入 a => gHPVST6jk9PvEgIDjnZzQm5eol/ky4Ziny9q0k+2OJZ8nridUSTC/qZT9OPaG347Nzk60/ZG/6a8rRCQTf/49+EZtiKnkvi24KuSdDKia2sKLeFF28Zyj5GZBwWC260A  b=>i8hkVttH5BgdJMYw
c函数进入 a => i8hkVttH5BgdJMYw  b=>010001 c=>00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7

encSecKey:"4db476c4059d59a35dbabe6c1e1588ffffc80d964f9da8b709056b193b0d72d4535a15cbb52287efd39addb51f9db0a059af317be1878235bccc38d77580a91416c791dccd199374251a10c0f1244a53a5cbc7499a36af0ae1a73a3c00a50bb99c4f79b1985aaac0856aa6268089ee379de3be2a249908676b46c25e4564561d"
encText:"6ZNWq6L+AjYiujItwQE2lTCLJc2x/C5Z+hjodS9UaOZrpNyoKJXdMoFFrpnoU4JaEqSNs/ir5nv/0ajZWFxZ9HiasEY76XbDH4WvmvaoLfuZE69cHFUbhVpKUY7eS8cOJniKK82Blc0aVsgiad02qWx5WP4kdpIq7OtRtwIK0ZGNsn4RE6rtvBcFYwEj3zlr"

可变参数是进入d函数的d参数 :
{“rid”:“R_SO_4_1342798229”,“offset”:“0”,“total”:“true”,“limit”:“20”,“csrf_token”:""}

接下来就是最核心的一步了 , 构造params参数 (以PHP为例):

$songData = '{"rid":"R_SO_4_1342798229","offset":"0","total":"true","limit":"20","csrf_token":""}';
//固定参数
$v1 = '0CoJUm6Qyw8W8jud';
$v2 = '0102030405060708' ;
//下面这个参数使用上一篇中的cui5UlB0Hp5Osovk
$v3 = 'cui5UlB0Hp5Osovk';
$songData = openssl_encrypt($songData, 'aes-128-cbc', $v1 , false, $v2);
$params   = openssl_encrypt($songData, 'aes-128-cbc', $v3, false, $v2);

参数都已经获取了下一步就是构造请求了:

$header = array(
#    'Referer'         => 'https://music.163.com/song?id=1342798229',
    'User-Agent'      => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
    "Host"            => "music.163.com"  ,
);
$encSecKey = "b62d8aeac7f4cd0a2653997c76ec40103504a6b3ccd6df45230ba0c9ffba53f21574676726c37a4c3910d394e21e9a71604b084f4e1105d6c4db7e7da1af9d4e2357fb80ebcdeb13d8bc268288a5d9ae23bec681c1f15fa7eb9a03c9229e9a94efd887e46b332f4c3421567bf9236cf847cd7708b4d39a03bff95cb194879635";

$curl = curl_init("https://music.163.com/weapi/v1/resource/comments/R_SO_4_1342798229?csrf_token=");

curl_setopt($curl , CURLOPT_HEADER , false);
curl_setopt($curl , CURLOPT_SSL_VERIFYPEER , false);
curl_setopt($curl , CURLOPT_RETURNTRANSFER , true);
curl_setopt($curl , CURLOPT_POST , true);
curl_setopt($curl , CURLOPT_HTTPHEADER , $header);
curl_setopt($curl , CURLOPT_POSTFIELDS , http_build_query(array("params" => $params , "encSecKey" => $encSecKey)));

$response = curl_exec($curl);

echo $response;

结果:
在这里插入图片描述
好了评论爬取到了 , 下面贴上全部代码:

define("PARAME1" , "0CoJUm6Qyw8W8jud");
define("PARAME2" , "0102030405060708");
$songData = '{"rid":"R_SO_4_1342798229","offset":"0","total":"true","limit":"20","csrf_token":""}';

$header = array(
#    'Referer'         => 'https://music.163.com/song?id=1342798229',
    'User-Agent'      => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
    "Host"            => "music.163.com"  ,
);
$header = array_map(function($k,$v){
    return $k.": ".$v ;
} , array_keys($header) , $header);

$a_back = "cui5UlB0Hp5Osovk" ;

$songData = openssl_encrypt($songData, 'aes-128-cbc', PARAME1 , false, PARAME2);
$params   = openssl_encrypt($songData, 'aes-128-cbc', $a_back , false, PARAME2);

$encSecKey = "b62d8aeac7f4cd0a2653997c76ec40103504a6b3ccd6df45230ba0c9ffba53f21574676726c37a4c3910d394e21e9a71604b084f4e1105d6c4db7e7da1af9d4e2357fb80ebcdeb13d8bc268288a5d9ae23bec681c1f15fa7eb9a03c9229e9a94efd887e46b332f4c3421567bf9236cf847cd7708b4d39a03bff95cb194879635";

$curl = curl_init("https://music.163.com/weapi/v1/resource/comments/R_SO_4_1342798229?csrf_token=");

curl_setopt($curl , CURLOPT_HEADER , false);
curl_setopt($curl , CURLOPT_SSL_VERIFYPEER , false);
curl_setopt($curl , CURLOPT_RETURNTRANSFER , true);
curl_setopt($curl , CURLOPT_POST , true);
curl_setopt($curl , CURLOPT_HTTPHEADER , $header);
curl_setopt($curl , CURLOPT_POSTFIELDS , http_build_query(array("params" => $params , "encSecKey" => $encSecKey)));

$response = curl_exec($curl);

echo $response;

猜你喜欢

转载自blog.csdn.net/weixin_43999566/article/details/87812985