python_pandas_计算地铁站点的进出站量和客流量

import os
import pandas as pd
os.chdir(r'D:\data\地铁数据\地铁\161718od')

'''
此文件用于计算地铁站点的进出站量、od
0,1,2,3,4,5,6,7,8
7,21,2016-02-07 15:57:02,268024,深大站,22,2016-02-07 16:30:07,260028,香梅北站
'''
# 计算单个站点的发生量与吸引量
def on_out(df):
    ser = df.groupby(['date_origin','hour_origin','4']).size()
    ser.name = 'traffic_production'
    on = ser.reset_index()
    ser = df.groupby(['date_destination','hour_destination','8']).size()
    ser.name = 'traffic_attraction'
    out = ser.reset_index()
    on_out_data = pd.merge(on,out,left_on=['date_origin','hour_origin','4'],right_on=['date_destination','hour_destination','8'],how='outer')
    on_out = on_out_data[['date_origin','hour_origin','4','traffic_production','traffic_attraction']]
    on_out.to_csv(on_out_dir + 'on_out_hour.csv', header=False,index=False,mode='a')
# 计算两个站点之间的客流量
def od(df):
    sr = df.groupby(['date_origin','hour_origin','4','8']).size()
    sr.name = 'od_count'
    sr.reset_index()
    sr.to_csv(od_dir+'od_hour.csv', index=True,mode='a')


if __name__ == '__main__':
    on_out_dir = 'D:\data\地铁数据\地铁\on_out_161718\\'
    od_dir = 'D:\data\地铁数据\地铁\od_161718\\'
    files = os.listdir(r'D:\data\地铁数据\地铁\161718od\\')[13:]
    for file in files:
        # df = pd.read_csv('Metro_Statistics_OD_20160201-20160207.txt', usecols=[2,4,6,8])
        df = pd.read_csv(file, usecols=[2,4,6,8])
        df['date_origin'] = df['2'].map(lambda x: x[:10])
        df['hour_origin'] = df['2'].map(lambda x: x[10:13])
        df['date_destination'] = df['6'].map(lambda x: x[:10])
        df['hour_destination'] = df['6'].map(lambda x: x[10:13])
        on_out(df)
        od(df)

如需数据示例或帮忙请私聊我。。。

猜你喜欢

转载自blog.csdn.net/qq_30803353/article/details/106130072
今日推荐