[Python] pandas reads sqlite and processes missing values

def fix_data(engine, tb_name, stnm, stcd):
    df = pd.read_sql(tb_name, engine)
    df['tm'] = pd.to_datetime(df['tm']).dt.tz_localize('Asia/Shanghai')
    print(df['tm'].shape[0])  # 63653
    # 起止时间
    _start = df['tm'].loc[df.index[0]]
    _end = df['tm'].loc[df.index[-1]]
    full_range = pd.date_range(start=_start, end=_end, freq='60min', tz='Asia/Shanghai')
    print(full_range.shape[0])  # 63745
    # 设置原始数据索引为时间
    df = df.drop_duplicates(subset=['tm'], keep='last')  # 去除重复时间
    df['stamp'] = df['tm'].apply(lambda x: int(x.timestamp()))  # 获取原始数据时间戳
    df = df.set_index('tm', drop=True)  # 设置原始数据索引为时间
    # 查找缺测值
    df = df.reindex(full_range)
    na_index = df.index[df.val.isna()]  # 获取这缺测数据的行序号
    df = df.fillna({
    
    'val': 0, 'stnm': stnm, 'stcd': stcd})  # 填充缺测值
    df = df.reset_index(names='tm')  # 保留时间列,重新设置索引
    scale = 60 * 60  # 原始数据精度为逐小时
    df['stamp'] = df['tm'].apply(lambda x: int(x.timestamp()))
    df['ID'] = df['stamp'].apply(lambda x: int((int(x) - 1447344000) / scale) + 1)
    df = df.set_index('ID', drop=True)
    print(df)
    df.to_sql(tb_name, engine, if_exists='fail', index=True)

Guess you like

Origin blog.csdn.net/qq_25262697/article/details/131470850