【七天入门ML与DL-01】加载sklearn自带的数据集 数据集划分

加载鸢尾花数据集 并且划分数据集和验证集

from sklearn.datasets import load_iris
from sklearn.model_selection import  train_test_split
li=load_iris()
# print("获取特征值")
# print(li.data)
# print("目标值")
# print(li.target)
# print("描述")
# print(li.DESCR)

#返回值
#train --> x_train y_train
#test  --> x_test  y_test
x_train,y_train,x_test,y_test=train_test_split(li.data,li.target,test_size=0.25)
print("训练集特征值和目标值:",x_train,y_train)
print("测试机特征值和目标值:",x_test,y_test)
print(len(x_train))
print(x_train[0])
print(y_test[0])
print(len(x_test))

加载新闻数据集

from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import  train_test_split
news=fetch_20newsgroups(subset="all")
print(len(news))
print(news.data[0])
print(news.target[0])

加载波士顿房价数据集

from sklearn.datasets import load_boston
from sklearn.model_selection import  train_test_split
lb=load_boston()
print(len(lb.data))
print(len(lb.target))
print(lb.data)
print(lb.target)
print(lb.DESCR)

猜你喜欢

转载自blog.csdn.net/kz_java/article/details/120847682