def new_data(train_data_path):
with open(train_data_path, 'r') as f:
lines = f.readlines()
# 打乱数据
np.random.shuffle(lines)
with open("data/data9045/Newtraindata1.txt", 'a', encoding='utf-8') as ans1:
with open("data/data9045/Newtraindata2.txt", 'a', encoding='utf-8') as ans2:
with open("data/data9045/Newtraindata3.txt", 'a', encoding='utf-8') as ans3:
for line in lines:
data, label = line.split('\t')
if int(label) in [3,4,5,6,7,12]:
ans2.write(data+"\t"+label)
ans1.write(data+"\t"+str(14)+"\n")
else:
ans1.write(data+"\t"+str(15)+"\n")
ans3.write(data+"\t"+label)#label自带回车
train_data_path中的数据格式:
其中值得注意的是if条件判断语句label必须转换为int型,之前使用过将后面的[3,4,5,6,7,12]转换成字符型[‘3’,‘4’,‘5’,‘6’,‘7’,‘12’],但得到的结果一直不是想要的,最后意识到,split(’\t’)切片之后的label不一定就是个单个字符,应该是int(label)+‘\n’的形式。