python生成可视化数据(matplotlib)进阶版

上期我们讲到如何用matplotlib模块将表格里的数据转换成可视化的折线图,但是,这里会有一个问题,表格里的数据是死的,是我随手创建的。在这一期,我将讲解如何通过requests+matplotlib等编写一个真实数据的可视化内容。

今天就来统计一下csdn网站上作者总榜前20名作者的粉丝数吧

首先,第一步,通过requests模块获取相应的数据

我们先确定我们需要什么数据 1.作者名称   2,作者粉丝数

一,通过fiddler抓包,找到哪个接口有我们想要的数据

二,将抓取到的接口通过python进行请求

这一步我们将抓取到的内容通过requets模块请求出来 然后获取数据,我将作者名称作为折线图的x轴,将粉丝数作为y轴。

import requests


headers={
"accept": "application/json, text/plain, */*",
"accept-encoding": "gzip, deflate, br",
"cookie": "ZDiqAPGhDC33/DDIbZxP1K7m47KdDROGUaxH+GhhxEQBoPxPEtRweGLDmKDyRi+ieDxaq0rD74irDDxD3Db4QDSDWKD9D0RMSgEHCDYPDElKDYxDrXpKDRxi7DDydzx07DQyk8Auk3SpwBOE1OAY4YlKD9hoDsEDL13OLXfws3FSE2QDqDB+bl+GtS0L5oW73Ex0koq0OShysGOoXRxBzPF4xEWiekOie7hiKY70reATKHWKP+XLS4wWqY7zgKaxNBqDWxH2EpQGDD; ssxmod_itna2=YqAxBD2DcDy0nqBPxPrDkDuAWIMuih7Y8B8DA=njrD/iKK7DFouDYvOeKD7Bu82kKGFKQuii+YIaYg0pY5IOIiQ6zbZBaw4wt6McX8dI=Sb0mrszl8uO6II1g6u5biy7PQ5sxsn/i1PGlgyaneBiKWv+YIuby77hq1BQitmzAoKbC2moDxqrS2ideQcTKhnPd=WsiRr=uQ8KKrqb7Qu1YrobSh3EM6ozY0qxXYlP60oR+oMCyxI7ifORO24cG15FUbe66iqOkTKbNLPIaz6C41Ram5pgr1vXbogMHcdhYULZ0StZl5ImSwrjWsIRs0c7Wmzjn0iAOFbUU44I4=7BbfpeM4kGRgKbt0YLIhzSy4x7eowlFRDpBzKFbWoi0RwAT3NKdPR=iFCGu8+xd2bWBmoUoUBxroGrP4FKDKixDKk7eA7Y7oqdKc7FSB5XA3W6GOiY27facL763ewLSDK33qGPHqixXGjqG6Upxifh0DEcBKnGDyifRa5PiYOAXSYqYdXWaNK/iV0x4IfdGX5SqEPKYY55iGyGEbIEk9qSBqD7=DYFbTlepMFgrTIQmB2e5pI5cLXrCvQ44b4zxe44K0MmqRsk=iI=Qzhxi4NSLCatNQGr61X=GDD=; UserName=m0_58002043; UserInfo=0cdc4e7b0ce247eaabce856bc36b72ea; UserToken=0cdc4e7b0ce247eaabce856bc36b72ea; UserNick=%E6%B5%8B%E8%AF%95%E5%B0%8F%E8%80%81%E5%BC%9F; AU=B56; UN=m0_58002043; BT=1635586769322; p_uid=U010000; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%220%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22m0_58002043%22%2C%22scope%22%3A1%7D%7D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_16997883450-1635586744973-543391!5744*1*m0_58002043; __gads=ID=578a42a6d4b79dcb-223f08007ace0042:T=1635586782:RT=1635586782:S=ALNI_MY68FM8_Tx2RprvFECbZR_DFbK4DQ; dc_session_id=10_1640346723536.350387; c_first_ref=www.baidu.com; c_first_page=https%3A//www.csdn.net/; c_segment=15; dc_sid=9417d87cc2063e06526df83adb1b4a49; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1639746041,1639746140,1639746147,1640346727; firstDie=1; log_Id_click=55; csrfToken=5uUA7u5wvhEl4aQV3lAyjiMP; c_pref=https%3A//www.csdn.net/; c_ref=https%3A//blog.csdn.net/m0_58002043%3Fspm%3D1000.2115.3001.5343; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1640346748; log_Id_view=209; c_page_id=default; dc_tos=r4mbnj; log_Id_pv=79",
"referer": "https://blog.csdn.net/rank/list/total?spm=1001.2014.3001.5476",
"sec-ch-ua": '"Chromium";v="94", "Google Chrome";v="94", ";Not A Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"
}
a=requests.get(url="https://blog.csdn.net/phoenix/web/blog/all-rank?page=0&pageSize=20",headers=headers)
a.content.decode("utf-8")
x=[]
y=[]
print(a.json())
print(a.json()["data"]["allRankListItem"])
for n in a.json()["data"]["allRankListItem"]:  #获取作者名称
    x.append(n["nickName"])

for i in a.json()["data"]["allRankListItem"]:  #获取作者粉丝数
    y.append(i["fansCount"])

print(x)
print(y)



运行结果:
['涛歌依旧', '一个处女座的程序猿', 'yuanmeng001', 'SAP剑客', '兔老大RabbitMQ', 'AI视觉网奇', '沉默王二', 'dog250', '不吃西红柿丶', '冰 河', '迂者-贺利坚', 'OkidoGreen', 'IT1995', '汪子熙', 'LiveEveryDay', '布客飞龙', 'caimouse', '梦想橡皮擦', '跨链技术践行者', 'chengqiuming']
[171187, 632363, 15973, 17619, 95776, 10853, 313483, 23555, 87750, 124028, 31309, 6264, 5826, 3068, 3102, 5767, 12960, 132325, 17685, 2465]

三,将抓取到的数据通过matplotlib模块转换为折线图

我们这里直接将我们获取到的内容传递给我们的matplotlib即可,看下实现代码吧

import requests
import matplotlib.pyplot as plt

headers={
"accept": "application/json, text/plain, */*",
"accept-encoding": "gzip, deflate, br",
"cookie": "ssxmod_itna=YqAxBD2DcDy0nqBPxPrDkDuAWIMuih7Y8B+exGXpYDZDiqAPGhDC33/DDIbZxP1K7m47KdDROGUaxH+GhhxEQBoPxPEtRweGLDmKDyRi+ieDxaq0rD74irDDxD3Db4QDSDWKD9D0RMSgEHCDYPDElKDYxDrXpKDRxi7DDydzx07DQyk8Auk3SpwBOE1OAY4YlKD9hoDsEDL13OLXfws3FSE2QDqDB+bl+GtS0L5oW73Ex0koq0OShysGOoXRxBzPF4xEWiekOie7hiKY70reATKHWKP+XLS4wWqY7zgKaxNBqDWxH2EpQGDD; ssxmod_itna2=YqAxBD2DcDy0nqBPxPrDkDuAWIMuih7Y8B8DA=njrD/iKK7DFouDYvOeKD7Bu82kKGFKQuii+YIaYg0pY5IOIiQ6zbZBaw4wt6McX8dI=Sb0mrszl8uO6II1g6u5biy7PQ5sxsn/i1PGlgyaneBiKWv+YIuby77hq1BQitmzAoKbC2moDxqrS2ideQcTKhnPd=WsiRr=uQ8KKrqb7Qu1YrobSh3EM6ozY0qxXYlP60oR+oMCyxI7ifORO24cG15FUbe66iqOkTKbNLPIaz6C41Ram5pgr1vXbogMHcdhYULZ0StZl5ImSwrjWsIRs0c7Wmzjn0iAOFbUU44I4=7BbfpeM4kGRgKbt0YLIhzSy4x7eowlFRDpBzKFbWoi0RwAT3NKdPR=iFCGu8+xd2bWBmoUoUBxroGrP4FKDKixDKk7eA7Y7oqdKc7FSB5XA3W6GOiY27facL763ewLSDK33qGPHqixXGjqG6Upxifh0DEcBKnGDyifRa5PiYOAXSYqYdXWaNK/iV0x4IfdGX5SqEPKYY55iGyGEbIEk9qSBqD7=DYFbTlepMFgrTIQmB2e5pI5cLXrCvQ44b4zxe44K0MmqRsk=iI=Qzhxi4NSLCatNQGr61X=GDD=; UserName=m0_58002043; UserInfo=0cdc4e7b0ce247eaabce856bc36b72ea; UserToken=0cdc4e7b0ce247eaabce856bc36b72ea; UserNick=%E6%B5%8B%E8%AF%95%E5%B0%8F%E8%80%81%E5%BC%9F; AU=B56; UN=m0_58002043; BT=1635586769322; p_uid=U010000; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%220%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22m0_58002043%22%2C%22scope%22%3A1%7D%7D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_16997883450-1635586744973-543391!5744*1*m0_58002043; __gads=ID=578a42a6d4b79dcb-223f08007ace0042:T=1635586782:RT=1635586782:S=ALNI_MY68FM8_Tx2RprvFECbZR_DFbK4DQ; dc_session_id=10_1640346723536.350387; c_first_ref=www.baidu.com; c_first_page=https%3A//www.csdn.net/; c_segment=15; dc_sid=9417d87cc2063e06526df83adb1b4a49; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1639746041,1639746140,1639746147,1640346727; firstDie=1; log_Id_click=55; csrfToken=5uUA7u5wvhEl4aQV3lAyjiMP; c_pref=https%3A//www.csdn.net/; c_ref=https%3A//blog.csdn.net/m0_58002043%3Fspm%3D1000.2115.3001.5343; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1640346748; log_Id_view=209; c_page_id=default; dc_tos=r4mbnj; log_Id_pv=79",
"referer": "https://blog.csdn.net/rank/list/total?spm=1001.2014.3001.5476",
"sec-ch-ua": '"Chromium";v="94", "Google Chrome";v="94", ";Not A Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"
}
a=requests.get(url="https://blog.csdn.net/phoenix/web/blog/all-rank?page=0&pageSize=20",headers=headers)
a.content.decode("utf-8")
x=[]
y=[]
print(a.json())
print(a.json()["data"]["allRankListItem"])
for n in a.json()["data"]["allRankListItem"]:  #获取作者名称
    x.append(n["nickName"])

for i in a.json()["data"]["allRankListItem"]:  #获取作者粉丝数
    y.append(i["fansCount"])

print(x)
print(y)




plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False  #解决因为中文导致乱码现象
fig = plt.figure(figsize=(30,8))  #由于内容太多 我们把画布放大,这样就不好导致内容太多出现重叠的问题
plt.plot(x,y)##将我们解析到的x轴和y轴的内容放在这里,作为参数传递进去就ok啦
plt.show()

看下实现效果吧

可以支持内容放大的

ctrl+鼠标滑轮可以将图放大

这里我们可以看到粉丝数最多的是”一个处女座的程序猿“的粉丝数是最多的哦

同时,也可以用柱状图的内容显示,只要更改一个函数内容就行

 

看下柱状图效果吧

 今天的分享就在这里,我们下次在进行内容进阶吧,欢迎关注哦 我们一起成长学习 拜拜。

猜你喜欢

转载自blog.csdn.net/m0_58002043/article/details/122121885