Fetch and fuzzy filter according to specified conditions

import pandas as pd
import os
import numpy as np
from datetime import date,timedelta
# today=date.today()-timedelta(1)
today=date.today()
print(today)

downpath='/Users/kangyongqing/Downloads/'
file1='20230724_172022.csv'

dt=pd.read_csv(os.path.join(downpath,file1))
dt['教师id']=dt['教师id'].astype(np.int64).astype(str)
#长数字id转换为字符创
dt.to_excel(downpath+'教师档案'+str(today)+'.xlsx',index=False)

print(dt.columns)

dt1=dt.loc[:,['教师id','教师姓名','教管', '开放状态', '司龄月', '季度级别', '确认跟随学生数','学历','本科标签','英语证书', '英语听说','月课时', '月课时类型', '剩余可用小时', '发布时间利用率','试听课量', '试听课转化率']]
print(dt1.head())
xueli=dt1.groupby('学历')['教师id'].count()
benke=dt1.groupby('本科标签')['教师id'].count()
zhengshu=dt1.groupby('英语证书')['教师id'].count()
tingshuo=dt1.groupby('英语听说')['教师id'].count()
print(xueli)
print(benke)
print(zhengshu)
print(tingshuo)

dt2=dt1[((dt1['学历']=='硕士')|(dt1['学历']=='博士')|(dt1['本科标签']=='双一流'))&(dt1['英语听说']=='熟练使用,可以授课')&(dt1['英语证书'].str.len()>0)]

print(dt2.groupby('学历')['教师id'].count())
dt2.to_excel(downpath+'硕博双一流英文授课教师名单'+str(today)+'.xlsx',index=False)

words=['熟练','可以授课']
dt3=dt1.loc[sum(dt1['英语听说'].str.contains(word) for word in words)>0]
dt3.to_excel(downpath+'熟练授课'+str(today)+'.xlsx',index=False)

Common knowledge points:

  1. Convert long id to string
  2. Add today's date to the filename
  3. groupby statistics view
  4. or a combination of conditions and conditions
  5. The use of fuzzy conditions

Guess you like

Origin blog.csdn.net/Darin2017/article/details/131902668