python_将爬取的百度地图迁徙json数据写入到excel

爬取百度地图迁徙数据的方法请参考这篇文章:
python_爬虫_百度地图迁徙_迁入地来源_迁出目的地

import os
import re
from urllib import request
import xlwt

from utils.read_write import readTXT, writeOneJSON, eachFile

os.chdir(r'D:\data\百度迁徙大数据\最新城市省份流入流出数据\json')

# 设置excel的样式
def set_style(name, height, bold=False):
    style = xlwt.XFStyle()  # 初始化样式
    font = xlwt.Font()  # 为样式创建字体
    font.name = name  # 'Times New Roman'
    font.bold = bold
    font.color_index = 4
    font.height = height
    style.font = font
    return style

f = xlwt.Workbook()
sheet2 = f.add_sheet(u'sheet2', cell_overwrite_ok=True)  # 创建sheet2
row0 = [u'迁入城市',u'所在城市',u'占比',u'迁出城市',u'所在城市',u'占比',u'迁入省份',u'所在城市',u'占比',u'迁出省份',u'所在城市',u'占比']
 # 生成第一行
for i in range(0, len(row0)):
    sheet2.write(0, i, row0[i], set_style('Times New Roman', 200, True))


default = set_style('Times New Roman', 220)
# 把txt文件读取成字符串数组
lines = readTXT('D:\project\jianguiyuan\data\BaiduMap_cityCode_1102.txt')

# 先将数据下载为json文件
def city_range(n,riqi):
    for i in range(n, 327):
        print(i)
        # 把城市id号和城市名分开
        obj = lines[i].split(',')
        print(obj[1])
        fileline = readTXT("城市迁入_" + obj[1] + "_" + riqi + ".json")
        ner = fileline[0].replace('\\','')
        pat = '{"city_name":"(.*?)","province_name":".*?","value":.*?}'
        pat1 = '{"city_name":".*?","province_name":".*?","value":(.*?)}'
        result = re.compile(pat).findall(ner)
        result1 = re.compile(pat1).findall(ner)
        column0 = result
        column1 = result1
        column2 = obj[1]
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) * (i-1) + 1, 0, column0[i1], default)
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) *  (i-1) + 1, 1, column2, default)
        for i1 in range(0, len(column1)):
            sheet2.write(i1 + len(column0) *  (i-1) + 1, 2, column1[i1], default)

        fileline = readTXT("城市迁出_" + obj[1] + "_" + riqi + ".json")
        fileline[0] = fileline[0].replace('\\', '')
        pat = '{"city_name":"(.*?)","province_name":".*?","value":.*?}'
        pat1 = '{"city_name":".*?","province_name":".*?","value":(.*?)}'
        result2 = re.compile(pat).findall(fileline[0])
        result12 = re.compile(pat1).findall(fileline[0])
        column0 = result2
        column1 = result12
        column2 = obj[1]
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) * (i-1) + 1, 3, column0[i1], default)
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) * (i-1) + 1, 4, column2, default)
        for i1 in range(0, len(column1)):
            sheet2.write(i1 + len(column0) * (i-1) + 1, 5, column1[i1], default)

        fileline = readTXT("省份迁入_" + obj[1] + "_" + riqi + ".json")
        fileline[0] = fileline[0].replace('\\', '')
        # 对Unicode编码进行改造
        pat = '{"province_name":(.*?),"value":.*?}'
        pat1 = '{"province_name":".*?","value":(.*?)}'
        result = re.compile(pat).findall(fileline[0])
        result1 = re.compile(pat1).findall(fileline[0])
        column0 = result
        column1 = result1
        column2 = obj[1]
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) *  (i-1) + 1, 6, column0[i1], default)
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) *  (i-1) + 1, 7, column2, default)
        for i1 in range(0, len(column1)):
            sheet2.write(i1 + len(column0) * (i-1) + 1, 8, column1[i1], default)

        fileline = readTXT("省份迁出_" + obj[1] + "_" + riqi + ".json")
        fileline[0] = fileline[0].replace('\\', '')
        pat = '{"province_name":(.*?),"value":.*?}'
        pat1 = '{"province_name":".*?","value":(.*?)}'
        result2 = re.compile(pat).findall(fileline[0])
        result12 = re.compile(pat1).findall(fileline[0])
        column0 = result2
        column1 = result12
        column2 = obj[1]
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) *  (i-1) + 1, 9, column0[i1], default)
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) *  (i-1) + 1, 10, column2, default)
        for i1 in range(0, len(column1)):
            sheet2.write(i1 + len(column0) * (i-1) + 1, 11, column1[i1], default)


def date_change(date):
    date_list=[]
    # 注意这个日期,一个月只有31天,爬取2月份的数据需要重新改
    for riqi in range(date, 20200131):
        date_list.append(str(riqi))
    for riqi in range(20200201, 20200230):
        date_list.append(str(riqi))
    for riqi in range(20200301, 20200316):
        date_list.append(str(riqi))
    for riqi in date_list:
        print(riqi)
        city_range(1,riqi)
        print("大吉大利,今晚吃鸡啊!")
        filename = 'D:\data\人口数据\百度迁徙大数据\最新城市省份流入流出数据\\riqi\\'+riqi+'.xls'
        f.save(filename)


if __name__ == '__main__':
    # dir = 'D:\data\百度迁徙大数据\最新城市省份流入流出数据\json'
    date_change(20200104)

其中的参考文件请移步到我的下载
我的下载

如需帮忙处理数据和爬取数据请私聊我。。。

猜你喜欢

转载自blog.csdn.net/qq_30803353/article/details/106455748