Python novel coronavirus outbreak statistical data automatically crawl + + + Send Report Data screen (a) reptiles article

Here Insert Picture Description

Not all come off guard

A half months ago, Wuhan or that aspect arena, wanton world of Wuhan. 165 rivers flowing night, over 100 different heavy like premature steaming, Riversides 13 kilometer along the light show the building 889 are sequentially turned on, the lights in heavy Gongchoujiaocuo. Wuhan has never lacked the reality, but today, no doubt, Wuhan people - and more people are accompanied by Wuhan through its critical moment!

Project Design features:

  1. Crawling the timing data stored in disease Mysql
  2. Data analysis was made epidemic report
  3. Use itchat analysis report is sent to friends and relatives
  4. Based on the data screen Django do
  5. Use Tableau for data analysis
Take a look at the final resultHere Insert Picture Description

Share now completed the first time after 80% complete for everyone

Next we introduce crawling epidemic data

Destination URL: Doctor cloves

import re
import time
import json
import datetime
import requests
import pymysql
import pandas as pd


class VirusSupervise(object):
    def __init__(self):
        self.url = 'https://3g.dxy.cn/newh5/view/pneumonia?scene=2&clicktime=1579582238&enterid=1579582238&from=timeline&isappinstalled=0'
        self.all_data = list()
        self.host_ip = "127.0.0.1"  # 你的mysql服务器地址
        self.host_user = "root"
        self.password = 123456  # 你的mysql密码

    def request_page(self):
        """
        请求页面数据
        """
        res = requests.get(self.url)
        res.encoding = 'utf - 8'
        pat0 = re.compile('window.getAreaStat = ([\s\S]*?)</script>')
        data_list = pat0.findall(res.text)
        data = data_list[0].replace('}catch(e){}', '')
        data = eval(data)
        return data

    def deep_spider(self, data, province_name):
        """
        深度提取出标签里详细的数据
        :param data:
        :param province_name:
        :return:
        """
        for temp_data in data:
            self.all_data.append([temp_data["cityName"], temp_data["confirmedCount"], temp_data["curedCount"],
                                  temp_data["deadCount"], province_name, datetime.date.today(),
                                  datetime.datetime.now().strftime('%H:%M:%S')])

    def filtration_data(self):
        """
        过滤数据
        """
        temp_data = self.request_page()
        province_short_names, confirmed_counts, cured_counts, dead_counts = list(), list(), list(), list()
        for i in temp_data:
            province_short_names.append(i['provinceShortName'])  # 省份
            confirmed_counts.append(i['confirmedCount'])  # 确诊
            cured_counts.append(i['curedCount'])  # 治愈
            dead_counts.append(i['deadCount'])  # 死亡
            self.deep_spider(i['cities'], i["provinceShortName"])  # 深度解析数据添加到实例属性中

        data_all = pd.DataFrame(self.all_data, columns=["城市", "确诊", "治愈", "死亡", "省份", "日期", "时间"])
        # print(data_all[data_all["省份"] == "陕西"])
        df = pd.DataFrame()
        df['省份'] = province_short_names
        df['确诊'] = confirmed_counts
        df['治愈'] = cured_counts
        df['死亡'] = dead_counts
        print(df)
        # data_all.to_csv("疫情数据_1.csv", encoding="utf_8_sig")
        return data_all

    def insert_wis_sql(self):
        data = self.filtration_data()

        coon = pymysql.connect(host=self.host_ip, user=self.host_user, password=self.password, database="epidemic_data",
                                    charset="utf8")
        number = int(pd.read_sql("select cycle from all_data order by id DESC limit 1", coon)["cycle"].to_list()[0]) + 1
        print("正在向阿里云服务器插入数据: ", number)
        cursor = coon.cursor()  # 创建事务
        sql = "insert into all_data(cityName, confirmedCount, curedCount, deadCount, province_name, " \
              "date_info, detail_time, cycle) values(%s, %s, %s, %s, %s, %s, %s, %s)"

        print("正在插入数据...")
        for cityName, confirmedCount, curedCount, deadCount, province_name, date_info, detail_time in zip(data["城市"],
                        data["确诊"], data["治愈"], data["死亡"], data["省份"], data["日期"], data["时间"]):
            cursor.execute(sql, (cityName, confirmedCount, curedCount, deadCount, province_name, date_info, detail_time, number))
            coon.commit()
        print("数据插入完成...")
        cursor.close()
        coon.close()


if __name__ == '__main__':
    sup = VirusSupervise()
    sup.insert_wis_sql()

If there are no direct mysql server running sup.filtration_data () and then use the pandas direct to_csv to complete the data collection, if you want to climb the timing, add the program to the sleep cycle

Show results:

Here Insert Picture Description

If you have questions please share! Like it a point
Published 18 original articles · won praise 15 · views 2779

Guess you like

Origin blog.csdn.net/qq_42768234/article/details/104131630