两个文件夹下的csv都增加新的一列
"""
列名转换
添加年份
"""
import pandas as pd
import os
# dirpath = 'Springer'
dirpath = 'Elsevier'
def getCsvFiles(dirpath):
csvfile = []
for root, dirs, files in os.walk(dirpath):
for file in files:
csvfile.append(os.path.basename(file))
# print(csvfile)
return csvfile
files = getCsvFiles(dirpath)
def getNewCsvFiles(filepath, year):
data = pd.read_csv(filepath)
df = data.loc[:, ~data.columns.str.contains("^Unnamed")] # <class 'pandas.core.frame.DataFrame'>
df.loc[:, 'year'] = year
print(df)
outputfile = os.path.join(dirpath,'new'+year+'.csv')
df.to_csv(outputfile, index=False)
for file in files:
year = file.split('.')[0]
filepath = os.path.join(dirpath, file)
getNewCsvFiles(filepath, year)
print("Transform Successfully")