考勤清洗

import pandas as pd
import numpy as np
data=pd.read_excel("C:/Users/mgxx/Desktop/工作簿1.xlsx")#填文件路径
data.dropna(axis = 0)
#将姓名填满那一行
for i in data.index:
if i % 2==0:
data.iloc[i:i+2:2]=data[11].at[i]
#构造空数组
list1=[]
for i in range(0,len(data[11]),2):
for j in range(1,32):
list1.append(j)
aa = pd.DataFrame((x for x in list1),columns=["日期"])
aa["姓名"]=pd.DataFrame((str(x) for x in list1))
aa["时间"]=pd.DataFrame((str(x) for x in list1))
#把姓名按顺序提取放到列表
name=[]
for i in range(len(data[11])):
for j in range(1,32):
if i % 2 ==0:
name.append(str(data[j].at[i]))
#将打卡时间按顺序提取放到列表
time=[]
#将提取的数据拼接成表
for i in range(len(data[11])):
for j in range(1,32):
if i % 2 !=0:
time.append(str(data[j].at[i]))
for i in range(0, len(aa["时间"])):
aa["时间"].at[i] = time[i]
aa["姓名"].at[i] = name[i]
aa2 = pd.DataFrame((str(x).split('\n') for x in aa['时间']), index=aa.index, columns=['上班','下班', '下班2','空'])#付款时间分列,"columns=" 是分列后的列名
aa["上班"]=aa2["上班"]
aa["下班"]=aa2["下班"]
aa["上班"]=aa["上班"].str.replace("nan","")
aa['总时间']=pd.to_datetime(aa['下班']) -pd.to_datetime(aa['上班'])
aa['总时间']=aa['总时间']/np.timedelta64(1, 'h')#时间间隔转为小时
aa.set_index("日期",inplace=True)
aa.to_excel("C:/Users/mgxx/Desktop/考勤清洗.xlsx")#输出路径

猜你喜欢

转载自www.cnblogs.com/snackpython/p/11994700.html