import requests
import json
from pyquery import PyQuery as pq
from requests.exceptions import RequestException
from urllib.parse import urlencode
baseurl = r'https://m.weibo.cn/api/container/getIndex?'
headers = {
'Host': 'm.weibo.cn',
'Referer': 'https://m.weibo.cn/',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'Cookie': '_T_WM=65019e0551c9474bba50a69be2d3dbb4; WEIBOCN_FROM=1110006030; SUB=_2A25xD0FNDeRhGedI6VsX8CbKzjiIHXVS8G8FrDV6PUJbkdAKLXLdkW1NV7SoWBaZBYmweMOFNkQH_EP5Zy0y8fVl; SUHB=0bt9r23eqHFDgd; SCF=AqwIzfEA7gATPtlezF5tWi4fx_p5qdGdi8KondPe32IDUXkT25uUbNNnmcA_LbmdsD60CNM4axNFW4wvgBlrTW0.; SSOLoginState=1544237343; MLOGIN=1; M_WEIBOCN_PARAMS=lfid%3D1076032286908003%26luicode%3D20000174%26uicode%3D20000174'
}
def get_page(since_id):
#构建URL
params = {'containerid':'102803',
'openApp':'0',
'since_id':since_id
}
url = baseurl + urlencode(params)
# print (url)
try:
response = requests.get(url,headers = headers)
if response.status_code==200:
return response.json()
except requests.ConnectionError as e:
print ('Error:',e.args)
def parse_page(json):
if json:
items = json.get('data').get('cards') #获取data里的cards内容,
for item in items:
item = item.get('mblog') #获取bmlog的内容赋值于item
weibo ={} #声明一个空字典,存储想要的信息
weibo['id'] = item.get('id')
weibo['text'] = pq(item.get('text')).text()
weibo['attitudes'] = item.get('attitudes_count')
weibo['comments'] = item.get('comments_count')
weibo['reposts'] = item.get('reposts_count')
yield weibo
def main():
for page in range(1,11):
json_1 = get_page(page)
results = parse_page(json_1)
for result in results:
print(result)
if __name__ =="__main__":
main()
python 微博
猜你喜欢
转载自blog.csdn.net/weixin_41767230/article/details/84893069
今日推荐
周排行