問題の説明
深層学習モデルのトレーニング中に、多くのcsvテーブルが生成されますが、これは手作業で処理するには面倒です。
解決
または、コード処理の方が便利です。
import pandas as pd
import os
from pandas import DataFrame
def flatten(a):
#降维函数,把多维降为一维
for each in a:
if not isinstance(each, list):
yield each
else:
yield from flatten(each)
def convert_to_int(lists):
#在Python中将嵌套列表列表的元素从字符串转换为整数
return [int(el) if not isinstance(el,list) else convert_to_int(el) for el in lists]
def walk_dir(dir,topdown=True):
for root, dirs, files in os.walk(dir, topdown):
ACC_train = []
ACC_test = []
LOSS_train = []
LOSS_test = []
for name in files:
####选择以cvs结尾的文件
if name.endswith('.csv'):
####筛选包含layer关键词的文件####
layer = []
td = []
for l in range(1,11):
L = "CNN" + str(l)
layer.append(L)
if layer[l-1] in name:
for d in range(1,11):
TD = "td" + str(d*510)
td.append(TD)
if td[d-1] in name:
# print(name)
# print(root)
####返回文件的路径####
pathname = os.path.join(root, name).replace("\\","/")
# print(pathname)
####读取csv文件####
data = pd.read_csv(pathname,header=None)
# print('原始的表格:\n',data)
df = DataFrame(data)
####为表格添加列名####
column =[]
for i in range (1,101):
m = "epoch"+str(i)
column.append(m)
# print('column:\n', column)
df.columns = column
# print('添加了列名的表格:\n', df)
####截取trainacc所在行数据####
df_trainacc = df.iloc[4:5]
df_testacc = df.iloc[5:6]
df_trainloss = df.iloc[6:7]
df_testloss = df.iloc[7:8]
df_trainacc = df_trainacc.dropna(axis=1, how='all')
df_testacc = df_testacc.dropna(axis=1, how='all')
df_trainloss = df_trainloss.dropna(axis=1, how='all')
df_testloss = df_testloss.dropna(axis=1, how='all')
# 以"|"为分割符分列,返回Series
df_trainacc = df_trainacc["epoch1"].str.split(":",expand=True).fillna("")
df_testacc = df_testacc["epoch1"].str.split(":", expand=True).fillna("")
df_trainloss = df_trainloss["epoch1"].str.split(":", expand=True).fillna("")
df_testloss = df_testloss["epoch1"].str.split(":", expand=True).fillna("")
# print('df_trainacc:\n', df_trainacc)
#删除第一列数据
df_trainacc_drop = df_trainacc.drop([0], axis=1)
df_testacc_drop = df_testacc.drop([0], axis=1)
df_trainloss_drop = df_trainloss.drop([0], axis=1)
df_testloss_drop = df_testloss.drop([0], axis=1)
# print('df_trainacc_drop:\n', df_trainacc_drop)
#获取trainacc的数值
df_trainacc_drop_value = df_trainacc_drop.values
df_testacc_drop_value = df_testacc_drop.values
df_trainloss_drop_value = df_trainloss_drop.values
df_testloss_drop_value = df_testloss_drop.values
#对获取的数据进行降维等处理
x1 = [token for st in df_trainacc_drop_value for token in st]
trainacc = [ float(x) for x in x1 ]
# print("x1:", x1)
x2 = [token for st in df_testacc_drop_value for token in st]
testacc = [float(x) for x in x2]
x3 = [token for st in df_trainloss_drop_value for token in st]
trainloss = [float(x) for x in x3]
x4 = [token for st in df_testloss_drop_value for token in st]
testloss = [float(x) for x in x4]
# print("trainacc:", trainacc)
data_trainacc = [l,d,trainacc[0]]
data_testacc = [l, d, testacc[0]]
data_trainloss = [l, d, trainloss[0]]
data_testloss = [l, d, testloss[0]]
####将第i层第j个数据量下的trainacc保存起来
ACC_train.append(data_trainacc)
ACC_test.append(data_testacc)
LOSS_train.append(data_trainloss)
LOSS_test.append(data_testloss)
print("ACC_train:",ACC_train)
print("ACC_test:", ACC_test)
print("LOSS_train:", LOSS_train)
print("LOSS_test:", LOSS_test)
dirname = "P:/Research/originaldata"
walk_dir(dirname)
私のコードはまだ比較的簡単に書くことができますが、手動よりも高速です。
結果は次のとおりです。