爬取微博(Ajax)

#coding:utf-8
from urllib.parse import urlencode
import requests
base_url = 'https://m.weibo.cn/api/container/getIndex?'
headers = {
    'Host':'m.weibo.cn',
    'Referer' : 'https://m.weibo.cn/u/2830678474',
    'User_Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36',
    'X-Requested-With' : 'XMLHttpRequest',
}
def get_page(page):
    params = {
        'type' : 'uid',
        'value' : '2830678474',
        'containerid' : '1076032830678474',
        'page' : page
    }
    url = base_url + urlencode(params)
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.json()
    except requests.ConnectionError as e:
        print('Error', e.args)
import json

# json = get_page(1)
# items = json['data']['cards'][2]['mblog']
# print(items['attitudes_count'])


def parse_page(json):
    items = json['data']['cards'][2]['mblog']
    weibo = {}
    # weibo['id'] = item['id']
    weibo['text'] = items['text']
    weibo['attitudes'] = items['attitudes_count']
    weibo['comments'] = items['comments_count']
    # weibo['reposts'] = item.get('reposts_count')
    yield weibo
if __name__ == '__main__':
    for page in range(1, 4):
        json = get_page(2)
        # print(json['data']['cards'])
        results = parse_page(json)
        for result in results:
            print(result)
{'text': 'ICLR 2018最佳论文AMSGrad能够取代Adam吗<a data-url="http://t.cn/RuJq5Xf" target="_blank" href="https://weibo.cn/sinaurl/blocked15c8ebdb?luicode=10000011&lfid=1076032830678474&u=https%3A%2F%2Fmp.weixin.qq.com%2Fs%2FSXo5toCaqXrK7kuBLPB-SA&ep=GdZZaxlA5%252C2830678474%252CGdZZaxlA5%252C2830678474" class=""><span class="url-icon"><img src="//h5.sinaimg.cn/upload/2015/09/25/3/timeline_card_small_web_default.png"></span></i><span class="surl-text">网页链接</a> \u200b\u200b\u200b', 'attitudes': 2, 'comments': 0}
{'text': 'ICLR 2018最佳论文AMSGrad能够取代Adam吗<a data-url="http://t.cn/RuJq5Xf" target="_blank" href="https://weibo.cn/sinaurl/blocked15c8ebdb?luicode=10000011&lfid=1076032830678474&u=https%3A%2F%2Fmp.weixin.qq.com%2Fs%2FSXo5toCaqXrK7kuBLPB-SA&ep=GdZZaxlA5%252C2830678474%252CGdZZaxlA5%252C2830678474" class=""><span class="url-icon"><img src="//h5.sinaimg.cn/upload/2015/09/25/3/timeline_card_small_web_default.png"></span></i><span class="surl-text">网页链接</a> \u200b\u200b\u200b', 'attitudes': 2, 'comments': 0}
{'text': 'ICLR 2018最佳论文AMSGrad能够取代Adam吗<a data-url="http://t.cn/RuJq5Xf" target="_blank" href="https://weibo.cn/sinaur


猜你喜欢

转载自blog.csdn.net/qq_34000894/article/details/80326292