The nanny level has been edited for the brothers, you only need to enter your training txt file:
code show as below:
import pandas as pd
import itertools
#用到的两个库,需要教程下载的兄弟去我主页找一下安装方法
data_full_arrangement_train = pd.read_table('D:/1wangyong\pytorchtrains\train01.txt')
##这里路径改成兄弟们自己数据的类型就可以,前提是数据都是数值型
def full_arrangement_train(data_full_arrangement_train):
res = []
data_swap={}
number = 4
##这里的number代表从数据中随机从数据中不重复且全面抽取2-4个数据相加以及相乘,放在两个新的列里,
##如果你只想增加类似v0+v1,和v0*v1 两个数之间的随机组合成新特征,就number = 2
##这里的number ==4 表示,随机两个、三个、四个组成新特征
data_full_arrangement = data_full_arrangement_train.copy()
data_swap["target"] = data_full_arrangement["target"]
del data_full_arrangement["target"]
for key in data_full_arrangement:
res.append(key)
data_plus = {}
for num in itertools.combinations(res, number):
name=""
data_name = 0
for x in num:
name += x+"+"
data_name += data_full_arrangement[x]
data_plus[name] = data_name
Data_plus = pd.DataFrame(data_plus)
data_multiplication = {}
for num in itertools.combinations(res, number):
name=""
data_name = 1
for x in num:
name += x+"*"
data_name = data_name*data_full_arrangement[x]
data_multiplication[name] = data_name
Data_multiplication = pd.DataFrame(data_multiplication)
#合并 原始数据 N选二累加数据 N选二累乘数据
data_test = pd.concat([data_full_arrangement,Data_plus],axis=1)
data_test = pd.concat([data_test,Data_multiplication],axis=1)
data_test["target"] = data_swap["target"]
return data_test
data01 = full_arrangement_train(data_full_arrangement_train )
outputpath='D:/1wangyong\pytorchtrains\/train_full_4.txt'
##这里就是保存数据的路径
data01.to_csv(outputpath,sep='\t',index=False,header=True)
Brothers give a thumbs up! ! !
thanks thanks! ! !