python_将爬取的的百度地图迁徙json数据写成csv矩阵

爬取百度地图迁徙数据的方法请参考这篇文章:
python_爬虫_百度地图迁徙_迁入地来源_迁出目的地

将json数据处理成excel请参考这篇文章:
python_将爬取的百度地图迁徙json数据写入到excel

原始数据格式:

“jsonp_1584195671576_1286958({“errno”:0,“errmsg”:“SUCCESS”,“data”:{“list”:[{“province_name”:“山东省”,“value”:42.64},{“province_name”:“河南省”,“value”:24.15},

{“province_name”:“青海省”,“value”:0.02},{“province_name”:“新疆维吾尔自治区”,“value”:0.01}]}})”

处理成功的数据格式,矩阵的格式可用于机器学习研究
矩阵数据参考格式

import os
import re

from utils.read_write import readTXT, writeOneJSON, eachFile, writeOneCSV

os.chdir(r'D:\data\百度迁徙大数据\最新城市省份流入流出数据\json')

# 把txt文件读取成字符串数组
lines = readTXT('D:\project\jianguiyuan\data\BaiduMap_cityCode_1102.txt')
title = [0]
for i in range(1, 327):
    obj = lines[i].split(',')
    title.append(obj[1])

def writeTitle(riqi):
    writeOneCSV(title,dir+'各城市迁入矩阵'+ "_" + riqi +'.csv')
    writeOneCSV(title,dir+'各城市迁出矩阵'+ "_" + riqi +'.csv')
    # writeOneCSV(title,dir+'各省份迁入矩阵'+ "_" + riqi +'.csv')
    # writeOneCSV(title,dir+'各省份迁出矩阵'+ "_" + riqi +'.csv')

# 先将数据下载为json文件
def city_range(n,riqi):

    shengqianru = []
    shengqianchu = []
    titles = title
    for i in range(n, 327):
        qianru = []
        qianchu = []
        # print(i)
        # 把城市id号和城市名分开
        obj = lines[i].split(',')
        # print(obj[1])
        fileline = readTXT("城市迁入_" + obj[1] + "_" + riqi + ".json")
        ner = fileline[0].replace('\\','')
        pat = '{"city_name":"(.*?)","province_name":".*?","value":.*?}'
        pat1 = '{"city_name":".*?","province_name":".*?","value":(.*?)}'
        city_name = re.compile(pat).findall(ner)
        value = re.compile(pat1).findall(ner)
        qianru.append(obj[1])
        combine = []
        # 获取每一列对应的索引
        for name in city_name:
            for k in range(1, len(title)):
                if title[k] == name:
                    combine.append(title.index(name))
        # 获取数组索引所对应的值
        for m in range(1,327):
            if m in combine:
                col_value = value[combine.index(m)]
                qianru.append(float(col_value))
            else:
                qianru.append(0)

        fileline = readTXT("城市迁出_" + obj[1] + "_" + riqi + ".json")
        fileline[0] = fileline[0].replace('\\', '')
        pat = '{"city_name":"(.*?)","province_name":".*?","value":.*?}'
        pat1 = '{"city_name":".*?","province_name":".*?","value":(.*?)}'
        result2 = re.compile(pat).findall(fileline[0])
        result12 = re.compile(pat1).findall(fileline[0])
        qianchu.append(obj[1])
        combine = []
        for name in result2:
            for k in range(1, len(title)):
                if title[k] == name:
                    combine.append(title.index(name))
        for m in range(1,327):
            if m in combine:
                col_value = result12[combine.index(m)]
                qianchu.append(float(col_value))
            else:
                qianchu.append(0)

        writeOneCSV(qianru, dir + '各城市迁入矩阵' + "_" + riqi + '.csv')
        writeOneCSV(qianchu, dir + '各城市迁出矩阵' + "_" + riqi + '.csv')


def date_change(date):
    date_list=[]
    # 注意这个日期,一个月只有31天,爬取2月份的数据需要重新改
    for riqi in range(date, 20200131):
        date_list.append(str(riqi))
    for riqi in range(20200201, 20200230):
        date_list.append(str(riqi))
    for riqi in range(20200301, 20200328):
        date_list.append(str(riqi))
    for riqi in date_list:
        print(riqi)
        writeTitle(riqi)
        city_range(1,riqi)
        print("大吉大利,今晚吃鸡啊!")


if __name__ == '__main__':
    dir = 'D:\data\人口数据\百度迁徙大数据\最新城市省份流入流出数据\矩阵\\'
    date_change(20200101)

其中的参考文件请移步到我的下载
我的下载

如需帮忙处理数据和爬取数据请私聊我。。。

猜你喜欢

转载自blog.csdn.net/qq_30803353/article/details/106455903