pandas处理excel表格

1.需求

今天有同学问我一个小需求,如下:

在这里插入图片描述

要把以上excel表格中8点、14点、20点的数据,提取出来,然后转换成如下excel数据:

这里表格中sheet_name太多,复制、粘贴太过于枯燥! 哭哭哭!

2.使用pandas处理

每个excel表格中sheet_name都是从 0629 到 0903,而且每个sheet_name数据都是一样的,我们读取一个sheet_name之后就可以循环读取,手动复制、粘贴太枯燥了…

2.1 观察数据

我们先读取这个表格:

import numpy as np
import pandas as pd

path = r'C:\Users\JackYang\Desktop\数据处理\1仓.xlsx'
data = pd.read_excel(path, sheet_name='0629')
print(data.iloc[1, 6])
data

2.2 先读取表格中的一个sheet页

8点、14点、20点每个数据之间相差的间隔都是 4。这里我们就可以求出8点的数据后,使用循环,代码如下:

def read_sheet(path, begin_sheet_name: str):  
  
    print(begin_sheet_name)  
    data = pd.read_excel(path, sheet_name=begin_sheet_name)  
  
    # 1.日期  
    my_str = data.iloc[0, 0]  
    data_time = my_str[my_str.index('时') + 3: my_str.index('日') + 1]  
    data_time = pd.Series(data_time)  
  
    # 提取一个sheet页的数据  
    res = pd.DataFrame()  
    for i in [0, 4, 8]:  
        # 2.时间  
        time = data.iloc[1, 2 + i]  
        time = pd.Series(time)  
        time  
  
        # 3.外温  
        out_temp = data.iloc[2, 2 + i]  
        out_temp = pd.Series(out_temp)  
  
        # 4.外湿  
        out_wet = data.iloc[3, 2 + i]  
        out_wet = pd.Series(out_wet)  
  
        # 5.仓湿  
        w_wet = data.iloc[4, 2 + i]  
        w_wet = pd.Series(w_wet)  
  
        # 6. 仓1 , 2, 3, 4,5  
        w1 = pd.Series(data.iloc[5, 3 + i])  
        w2 = pd.Series(data.iloc[6, 3 + i])  
        w3 = pd.Series(data.iloc[7, 3 + i])  
        w4 = pd.Series(data.iloc[8, 3 + i])  
        w5 = pd.Series(data.iloc[9, 3 + i])  
  
        # 7.粮温高 下、中、上、表  
        h1 = pd.Series(data.iloc[11, 2 + i])  
        h2 = pd.Series(data.iloc[11, 3 + i])  
        h3 = pd.Series(data.iloc[11, 4 + i])  
        h4 = pd.Series(data.iloc[11, 5 + i])  
  
        # 8 低  
        L1 = pd.Series(data.iloc[12, 2 + i])  
        L2 = pd.Series(data.iloc[12, 3 + i])  
        L3 = pd.Series(data.iloc[12, 4 + i])  
        L4 = pd.Series(data.iloc[12, 5 + i])  
  
        # 9 平均  
        a1 = pd.Series(data.iloc[13, 2 + i])  
        a2 = pd.Series(data.iloc[13, 3 + i])  
        a3 = pd.Series(data.iloc[13, 4 + i])  
        a4 = pd.Series(data.iloc[13, 5 + i])  
  
        # 10.  
        # 连接  
        mid = pd.concat([data_time, time, out_temp, out_wet, w_wet  
                            , w1, w2, w3, w4, w5,  
                         h1, h2, h3, h4,  
                         L1, L2, L3, L4,  
                         a1, a2, a3, a4], axis=1)  
        res = pd.concat([res, mid], axis=0)  
        pass  
    return res

2.3 实现

import numpy as np  
import pandas as pd  
from datetime import datetime  
from datetime import timedelta  
  
  
# 得到下一个 sheet_namedef get_next_sheet(date_time: str):  
    month = int(date_time[:2])  
    day = int(date_time[2:])  
  
    time = datetime(2022, month, day)  
    d = time + timedelta(weeks=0, days=1, hours=0, minutes=0, seconds=0, milliseconds=0, microseconds=0, )  
    # 将日期转换成字符串  
    date = d.strftime('%Y%m%d')  
    return date[4:]  
  
  
path = r'C:\Users\JackYang\Desktop\数据处理\12仓.xlsx'  
  
  
def read_sheet(path, begin_sheet_name: str):  
  
    print(begin_sheet_name)  
    data = pd.read_excel(path, sheet_name=begin_sheet_name)  
  
    # 1.日期  
    my_str = data.iloc[0, 0]  
    data_time = my_str[my_str.index('时') + 3: my_str.index('日') + 1]  
    data_time = pd.Series(data_time)  
  
    # 提取一个sheet页的数据  
    res = pd.DataFrame()  
    for i in [0, 4, 8]:  
        # 2.时间  
        time = data.iloc[1, 2 + i]  
        time = pd.Series(time)  
        time  
  
        # 3.外温  
        out_temp = data.iloc[2, 2 + i]  
        out_temp = pd.Series(out_temp)  
  
        # 4.外湿  
        out_wet = data.iloc[3, 2 + i]  
        out_wet = pd.Series(out_wet)  
  
        # 5.仓湿  
        w_wet = data.iloc[4, 2 + i]  
        w_wet = pd.Series(w_wet)  
  
        # 6. 仓1 , 2, 3, 4,5  
        w1 = pd.Series(data.iloc[5, 3 + i])  
        w2 = pd.Series(data.iloc[6, 3 + i])  
        w3 = pd.Series(data.iloc[7, 3 + i])  
        w4 = pd.Series(data.iloc[8, 3 + i])  
        w5 = pd.Series(data.iloc[9, 3 + i])  
  
        # 7.粮温高 下、中、上、表  
        h1 = pd.Series(data.iloc[11, 2 + i])  
        h2 = pd.Series(data.iloc[11, 3 + i])  
        h3 = pd.Series(data.iloc[11, 4 + i])  
        h4 = pd.Series(data.iloc[11, 5 + i])  
  
        # 8 低  
        L1 = pd.Series(data.iloc[12, 2 + i])  
        L2 = pd.Series(data.iloc[12, 3 + i])  
        L3 = pd.Series(data.iloc[12, 4 + i])  
        L4 = pd.Series(data.iloc[12, 5 + i])  
  
        # 9 平均  
        a1 = pd.Series(data.iloc[13, 2 + i])  
        a2 = pd.Series(data.iloc[13, 3 + i])  
        a3 = pd.Series(data.iloc[13, 4 + i])  
        a4 = pd.Series(data.iloc[13, 5 + i])  
  
        # 10.  
        # 连接  
        mid = pd.concat([data_time, time, out_temp, out_wet, w_wet  
                            , w1, w2, w3, w4, w5,  
                         h1, h2, h3, h4,  
                         L1, L2, L3, L4,  
                         a1, a2, a3, a4], axis=1)  
        res = pd.concat([res, mid], axis=0)  
        pass  
    return res  
  
  
if __name__ == '__main__':  
    save_path = r'C:\Users\JackYang\Desktop\数据处理\12仓_res.xlsx'  
    # res = read_sheet(path, "0829")  
    # res.to_excel(save_path, index=None)  
    final_res = pd.DataFrame()  
    begin_date = '0629'                         # 0629  
    while begin_date <= "0903":  
        print(begin_date)  
        res = read_sheet(path, begin_date)  
        final_res = pd.concat([final_res, res], axis=0)  
        begin_date = get_next_sheet(begin_date)  
        pass  
    final_res.columns = ["日期", '时间', '外温', '外湿', '仓湿', '仓1', '仓2', '仓3', '仓4', '仓5',  
                            '下高', '中高', '上高', '表高', '下低', '中低', '上底', '表低',  
                            '下平', '中平', '上平', '表平']  
    final_res.to_excel(save_path, index=None)  
    # print(final_res)

猜你喜欢

转载自blog.csdn.net/y_h_k_666/article/details/124497929