Basic idea: There are two data tables, which are related according to the associated primary key, and then data is filtered
.
├── combine_data.py
├── frack
│ ├── 1_206004.csv
│ ├── 9_206004.csv
│ ├── 0_206004.csv
│ └── 2_206004.csv
├── result
├── 111_206004.csv
├── 999_206004.csv
├── 000_206004.csv
└── 112_206004.csv
import pandas as pd
import os
import re
list_0=["2019016"]
list_1=["2019666"]
list_2=["2019667"]
list_3=["2019761"]
list_t=[]
def make_data(data):
for item in data:
data = pd.merge(item[1], item[2], on=['v', 't'], how='left') # pandas csv表左连接 两张csv 进行合并
data = data[['v', 't', 'f', 'c_x0', 'c_y0', 'c_x1', 'c_y1', 'a']]
#data['activate_flag'] = data['a'].fillna('null')
data.dropna(axis=0, how='any', inplace=True) #删除列存在NAN的所在的行
data=data.astype(str)
data=data[data['v'].str.contains('24|25|26')]
# 删除列中所包含的字段的行
print(data)
name = ".".join(["combine",item[0], "csv"])
result_path = os.path.join(root, name)
print(result_path)
data.to_csv(name, index=False, encoding='gbk')
for root,dirs,files in os.walk("./"):
for file in files:
path_file = os.path.join(root,file)
if re.search("old",path_file,re.I) and file.endswith(".csv"):
if re.search("1_206004", path_file, re.I):
df = pd.read_csv(path_file,index_col=False)
list_0.append(df)
elif re.search("111_206004", path_file, re.I):
dd = pd.read_csv(path_file,index_col=False)
list_0.append(dd)
elif re.search("9_206004", path_file, re.I):
df = pd.read_csv(path_file,index_col=False)
list_1.append(df)
elif re.search("999_206004", path_file, re.I):
dd = pd.read_csv(path_file,index_col=False)
list_1.append(dd)
elif re.search("0_206004", path_file, re.I):
df = pd.read_csv(path_file,index_col=False)
list_2.append(df)
elif re.search("000_206004", path_file, re.I):
dd = pd.read_csv(path_file,index_col=False)
list_2.append(dd)
elif re.search("2_206004", path_file, re.I):
df = pd.read_csv(path_file,index_col=False)
list_3.append(df)
elif re.search("112_206004", path_file, re.I):
dd = pd.read_csv(path_file,index_col=False)
list_3.append(dd)
list_t.append(list_0)
list_t.append(list_1)
list_t.append(list_2)
list_t.append(list_3)
make_data(list_t)