Take novel coronavirus pneumonia current events, for example,
The use of real-time tracking of the epidemic platforms to crawl the web data, Tencent news here as an example.
step
- Use browser to open Tencent News Live epidemic
- Use the browser that comes with developer tools crawl data
- Data obtained from the analysis of data acquired each way
- start
A required component is used (do not know how to install, then please Baidu search python xx module how to install, you can see its tutorial)
- requests module
- re module
- json module
Into the title
From step 3, come to gain access to the world's way is to use the total number of outbreaks GET
https://view.inews.qq.com/g2/getOnsInfo?name=wuwei_ww_global_vars&callback
observe the inside of the access to the data, obviously, after the return visit json is a piece of data. So we can use the json module will resolve it
after the data is processed.
code show as below
import requests
import re
import json
country =[] #国家
area = []#城镇
city = []#城市
dead = []#死亡人数
confirm = []#确诊人数
suspect = []#疑似人数
heal = []#治愈人数
Get_China = r"https://view.inews.qq.com/g2/getOnsInfo?name=wuwei_ww_global_vars&callback"#全球病情总人数
Get_City = r"https://view.inews.qq.com/g2/getOnsInfo?name=wuwei_ww_area_datas&callback"#地区数据
Get_City_V1 = r"https://view.inews.qq.com/g2/getOnsInfo?name=wuwei_ww_area_counts&callback"#地区数据
def GetHtmlText(url):
try:
res = requests.get(url,timeout = 30)
res.raise_for_status()
res.encoding = res.apparent_encoding
return res.text
except:
return "Error"
China = GetHtmlText(Get_China)
def GetTextCenter(Text,TextLeft,TextRight):#取出中间文本
L = Text.find(TextLeft) + len(TextLeft)
Text = Text[L:]
R = Text.find(TextRight)
return Text[:R]
City_Count_json = json.loads(China) #获取总json数据
City_Count_json = City_Count_json["data"] #获取总json数据中的data数据
City_Count_json = re.findall(r"{[^}]+}",City_Count_json) #将数组对象内的对象提取出来
City_Count_json = json.loads(City_Count_json[0]) #将其转换为对象
recentTime = str(City_Count_json["recentTime"]) #GetTextCenter(China,r"\"recentTime\": \"",r"\",\n")#更新时间
confirmCount = str(City_Count_json["confirmCount"])
suspectCount = str(City_Count_json["suspectCount"])
deadCount = str(City_Count_json["deadCount"]) #GetTextCenter(China,r"\"deadCount\": ",r",\n") #疑似人数
cure = str(City_Count_json["cure"]) #GetTextCenter(China,r"\"cure\": ",r"\n") #治愈人数
hintWords = str(City_Count_json["hintWords"]) #GetTextCenter(China,r"\"hintWords\":
print("更新时间:" + recentTime + "\n" + "确诊人数为:" + confirmCount + "人\n" + "死亡人数为:" +
deadCount + "人\n" + "疑似人数为:" + suspectCount + "人\n" + "治愈人数为:" + cure +
"人\n" + "最新消息:" + hintWords + "\n")
City_json = GetHtmlText(Get_City_V1)
City_Data = json.loads(City_json)
City_Data = City_Data["data"]
City_Data = re.findall(r"{[^}]+}",City_Data)#CitysJson
def GetCityData(CitysJson):#获取精确信息,返回成员长度
global country # 国家
global area # 城镇
global city # 城市
global dead # 死亡人数
global confirm # 确诊人数
global suspect # 疑似人数
global heal # 治愈人数
i = len(CitysJson)#获取json数据有多少个成员
for j in range(0,i):
data = json.loads(CitysJson[j])
country.append (data["country"])
area.append (data["area"])
city.append (data["city"])
dead.append(data["dead"])
confirm.append(data["confirm"])
suspect.append(data["suspect"])
heal.append(data["heal"])
return i
length = GetCityData(City_Data)
for n in range(0,length):
if(country[n] != "中国"):
print("国家:" + str(country[n]) + "\t" + "确诊:" + str(confirm[n]) + "例\t" + "治愈:" + str(heal[n]) + "例\t" + "死亡:" + str(dead[n]) + "例\n")
elif(city[n] == ""):
print("国家:" + str(country[n]) + "\t" + "地区:" + str(area[n]) + "\t" + "确诊:" + str(
confirm[n]) + "例\t" + "治愈:" + str(heal[n]) + "例\t" + "死亡:" + str(dead[n]) + "例\n")
else:
print("国家:" + str(country[n])+ "\t" + "地区:" + str(area[n]) + "\t" + "城市:" + str(city[n]) + "\t" + "确诊:" + str(
confirm[n]) + "例\t" + "治愈:" + str(heal[n]) + "例\t" + "死亡:" + str(dead[n]) + "例\n")