21, Python merge data table

Basic idea: There are two data tables, which are related according to the associated primary key, and then data is filtered

.
├── combine_data.py
├── frack
│   ├── 1_206004.csv
│   ├── 9_206004.csv
│   ├── 0_206004.csv
│   └── 2_206004.csv
├── result
    ├── 111_206004.csv
    ├── 999_206004.csv
    ├── 000_206004.csv
    └── 112_206004.csv


import pandas as pd
import os
import re



list_0=["2019016"]
list_1=["2019666"]
list_2=["2019667"]
list_3=["2019761"]
list_t=[]

def make_data(data):
    for item in data:
        data = pd.merge(item[1], item[2], on=['v', 't'], how='left')  # pandas csv表左连接  两张csv 进行合并
        data = data[['v', 't', 'f', 'c_x0', 'c_y0', 'c_x1', 'c_y1', 'a']]
        #data['activate_flag'] = data['a'].fillna('null')
        data.dropna(axis=0, how='any', inplace=True) #删除列存在NAN的所在的行
        data=data.astype(str)
        data=data[data['v'].str.contains('24|25|26')]
        # 删除列中所包含的字段的行
        print(data)
        name = ".".join(["combine",item[0], "csv"])
        result_path = os.path.join(root, name)
        print(result_path)
        data.to_csv(name, index=False, encoding='gbk')

for root,dirs,files in os.walk("./"):
    for file in files:
        path_file = os.path.join(root,file)
        if re.search("old",path_file,re.I) and file.endswith(".csv"):
            if re.search("1_206004", path_file, re.I):
                df = pd.read_csv(path_file,index_col=False)
                list_0.append(df)
            elif re.search("111_206004", path_file, re.I):
                dd = pd.read_csv(path_file,index_col=False)
                list_0.append(dd)
            elif re.search("9_206004", path_file, re.I):
                df = pd.read_csv(path_file,index_col=False)
                list_1.append(df)
            elif re.search("999_206004", path_file, re.I):
                dd = pd.read_csv(path_file,index_col=False)
                list_1.append(dd)
            elif re.search("0_206004", path_file, re.I):
                df = pd.read_csv(path_file,index_col=False)
                list_2.append(df)
            elif re.search("000_206004", path_file, re.I):
                dd = pd.read_csv(path_file,index_col=False)
                list_2.append(dd)
            elif re.search("2_206004", path_file, re.I):
                df = pd.read_csv(path_file,index_col=False)
                list_3.append(df)
            elif re.search("112_206004", path_file, re.I):
                dd = pd.read_csv(path_file,index_col=False)
                list_3.append(dd)
list_t.append(list_0)
list_t.append(list_1)
list_t.append(list_2)
list_t.append(list_3)
make_data(list_t)

 

Guess you like

Origin blog.csdn.net/sxj731533730/article/details/105767168