python 微博

import requests
import json

from pyquery import PyQuery as pq
from requests.exceptions import RequestException
from urllib.parse import urlencode

baseurl = r'https://m.weibo.cn/api/container/getIndex?'
headers = {
    'Host': 'm.weibo.cn',
    'Referer': 'https://m.weibo.cn/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest',
    'Cookie': '_T_WM=65019e0551c9474bba50a69be2d3dbb4; WEIBOCN_FROM=1110006030; SUB=_2A25xD0FNDeRhGedI6VsX8CbKzjiIHXVS8G8FrDV6PUJbkdAKLXLdkW1NV7SoWBaZBYmweMOFNkQH_EP5Zy0y8fVl; SUHB=0bt9r23eqHFDgd; SCF=AqwIzfEA7gATPtlezF5tWi4fx_p5qdGdi8KondPe32IDUXkT25uUbNNnmcA_LbmdsD60CNM4axNFW4wvgBlrTW0.; SSOLoginState=1544237343; MLOGIN=1; M_WEIBOCN_PARAMS=lfid%3D1076032286908003%26luicode%3D20000174%26uicode%3D20000174'
}

def get_page(since_id):
    #构建URL
    params = {'containerid':'102803',
    'openApp':'0',
    'since_id':since_id
    }
    url = baseurl + urlencode(params)
    # print (url)

    try:
        response = requests.get(url,headers = headers)
        if  response.status_code==200:

            return response.json()
    except requests.ConnectionError as e:
        print ('Error:',e.args)
def parse_page(json):

    if json:                    

        items = json.get('data').get('cards')   #获取data里的cards内容,
        for item in items:
            item = item.get('mblog')    #获取bmlog的内容赋值于item
            weibo ={}   #声明一个空字典,存储想要的信息 
            weibo['id'] = item.get('id')    
            weibo['text'] = pq(item.get('text')).text()
            weibo['attitudes'] = item.get('attitudes_count')
            weibo['comments'] = item.get('comments_count')
            weibo['reposts'] = item.get('reposts_count')
            yield weibo       
       
def main():
        for page in range(1,11):
            json_1 = get_page(page)
            results = parse_page(json_1)
            for result in results:
                print(result)    
   

if  __name__ =="__main__":
    main()

猜你喜欢

转载自blog.csdn.net/weixin_41767230/article/details/84893069