爬虫-微博移动端评论递归问题

#评论链接有max_id值,下个链接里的必要参数max_id是上个链接结果里的值,所以考虑到最后用递归,递归感觉还是挺难的,重点要考虑好结束条件,本案例结束条件就是max_id==0,等于0表示就是没有下一页了

import requests
import json
from lxml import etree

max_id=0
html_contents=[]
def down(url):
    headers={
                "accept":"application / json, text / plain, * / *",
        "upgrade-insecure-requests":"1",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
        # "Referer":"https: // m.weibo.cn / detail / 4323281584327025",
        "cookie":"_T_WM=74b5406b79cd18adabbcaac40f997914; WEIBOCN_FROM=1110006030; MLOGIN=1; SSOLoginState=1546235890; ALF=1548827890; SCF=Arj6zmmKiOmQAk_IgSYwafWcdI6LlAtTIuAWJCXnxyWffuZOwcMEjITykhpkEIjdpvk1Tl-MAFRtjZPwLBkKg7w.; SUB=_2A25xLd-iDeRhGeBG41IS9yzJzD2IHXVS0eHqrDV6PUNbktAKLRHTkW1NQeU4KyxGbCrkBPK46ssmM7owlLLmzyNw; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WF6hmlpjTzkNkQzFAuzj21D5JpX5KMhUgL.FoqR1h50S0zfS022dJLoIp7LxKML1KBLBKnLxKqL1hnLBoMXShBfehzRe0eX; SUHB=03oFS1TMqpmO_Q; M_WEIBOCN_PARAMS=oid%3D4323281584327025%26luicode%3D20000174%26lfid%3D4323281584327025%26uicode%3D20000061%26fid%3D4323281584327025",


    }
    html = requests.get(url,headers=headers).text
    print(html)
    if json.loads(html)['data']['max_id'] == 0:
        html_contents.append(json.loads(html))
        return 0
    else:

        html_contents.append([json.loads(html)])
        max_id = json.loads(html)['data']['max_id']
        print(max_id)
        print(type(max_id))
        print(down("https://m.weibo.cn/comments/hotflow?id=4323281584327025&mid=4323281584327025&max_id={}&max_id_type=0".format(str(max_id))))

        return 1
        # return down("https://m.weibo.cn/comments/hotflow?id=4323281584327025&mid=4323281584327025&max_id={}&max_id_type=0".format(str(max_id)))

print(down("https://m.weibo.cn/comments/hotflow?id=4323281584327025&mid=4323281584327025&max_id_type=0"))
print(html_contents)

猜你喜欢

转载自blog.csdn.net/weixin_42357472/article/details/85465713