Not all come off guard
A half months ago, Wuhan or that aspect arena, wanton world of Wuhan. 165 rivers flowing night, over 100 different heavy like premature steaming, Riversides 13 kilometer along the light show the building 889 are sequentially turned on, the lights in heavy Gongchoujiaocuo. Wuhan has never lacked the reality, but today, no doubt, Wuhan people - and more people are accompanied by Wuhan through its critical moment!
Project Design features:
- Crawling the timing data stored in disease Mysql
- Data analysis was made epidemic report
- Use itchat analysis report is sent to friends and relatives
- Based on the data screen Django do
- Use Tableau for data analysis
Take a look at the final result
Share now completed the first time after 80% complete for everyone
Next we introduce crawling epidemic data
Destination URL: Doctor cloves
import re
import time
import json
import datetime
import requests
import pymysql
import pandas as pd
class VirusSupervise(object):
def __init__(self):
self.url = 'https://3g.dxy.cn/newh5/view/pneumonia?scene=2&clicktime=1579582238&enterid=1579582238&from=timeline&isappinstalled=0'
self.all_data = list()
self.host_ip = "127.0.0.1" # 你的mysql服务器地址
self.host_user = "root"
self.password = 123456 # 你的mysql密码
def request_page(self):
"""
请求页面数据
"""
res = requests.get(self.url)
res.encoding = 'utf - 8'
pat0 = re.compile('window.getAreaStat = ([\s\S]*?)</script>')
data_list = pat0.findall(res.text)
data = data_list[0].replace('}catch(e){}', '')
data = eval(data)
return data
def deep_spider(self, data, province_name):
"""
深度提取出标签里详细的数据
:param data:
:param province_name:
:return:
"""
for temp_data in data:
self.all_data.append([temp_data["cityName"], temp_data["confirmedCount"], temp_data["curedCount"],
temp_data["deadCount"], province_name, datetime.date.today(),
datetime.datetime.now().strftime('%H:%M:%S')])
def filtration_data(self):
"""
过滤数据
"""
temp_data = self.request_page()
province_short_names, confirmed_counts, cured_counts, dead_counts = list(), list(), list(), list()
for i in temp_data:
province_short_names.append(i['provinceShortName']) # 省份
confirmed_counts.append(i['confirmedCount']) # 确诊
cured_counts.append(i['curedCount']) # 治愈
dead_counts.append(i['deadCount']) # 死亡
self.deep_spider(i['cities'], i["provinceShortName"]) # 深度解析数据添加到实例属性中
data_all = pd.DataFrame(self.all_data, columns=["城市", "确诊", "治愈", "死亡", "省份", "日期", "时间"])
# print(data_all[data_all["省份"] == "陕西"])
df = pd.DataFrame()
df['省份'] = province_short_names
df['确诊'] = confirmed_counts
df['治愈'] = cured_counts
df['死亡'] = dead_counts
print(df)
# data_all.to_csv("疫情数据_1.csv", encoding="utf_8_sig")
return data_all
def insert_wis_sql(self):
data = self.filtration_data()
coon = pymysql.connect(host=self.host_ip, user=self.host_user, password=self.password, database="epidemic_data",
charset="utf8")
number = int(pd.read_sql("select cycle from all_data order by id DESC limit 1", coon)["cycle"].to_list()[0]) + 1
print("正在向阿里云服务器插入数据: ", number)
cursor = coon.cursor() # 创建事务
sql = "insert into all_data(cityName, confirmedCount, curedCount, deadCount, province_name, " \
"date_info, detail_time, cycle) values(%s, %s, %s, %s, %s, %s, %s, %s)"
print("正在插入数据...")
for cityName, confirmedCount, curedCount, deadCount, province_name, date_info, detail_time in zip(data["城市"],
data["确诊"], data["治愈"], data["死亡"], data["省份"], data["日期"], data["时间"]):
cursor.execute(sql, (cityName, confirmedCount, curedCount, deadCount, province_name, date_info, detail_time, number))
coon.commit()
print("数据插入完成...")
cursor.close()
coon.close()
if __name__ == '__main__':
sup = VirusSupervise()
sup.insert_wis_sql()
If there are no direct mysql server running sup.filtration_data () and then use the pandas direct to_csv to complete the data collection, if you want to climb the timing, add the program to the sleep cycle