用python切分csv训练集测试集

sklearn库中训练集测试集的切分

from sklearn import neighbors

knn = neighbors.KNeighborsClassifier()#(n_neighbors=10)
from numpy import genfromtxt
a = open('list.csv', 'r+')
reader = csv.reader(a)#按行读取内容
#print(reader)
headers = next(reader)#打印出为title那行
 #print(headers)

在这里插入图片描述
原数据
在这里插入图片描述

dataPath = r"list.csv"
featureList = genfromtxt(dataPath, skip_header=1,delimiter=',',usecols=(1,2,3,4,5,6,7))
labelList = genfromtxt(dataPath, skip_header=1,delimiter=',',usecols=(0))
#print ("featureList")
x= featureList[:]
print(len(x))
print (x)
#print ("labelList")
y = labelList[:]
print(y)

在这里插入图片描述
from sklearn.model_selection import train_test_split#分割数据集
X_train, X_test, y_train, y_test = train_test_split(
x, y, test_size=0.25)
print(X_train)
在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/weixin_40123108/article/details/84572852