代码如下
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
import mglearn
import matplotlib.pyplot as plt
iris_dataset = load_iris()
print('keys of iris_dataset:\n{}'.format(iris_dataset.keys()))
print('target names:\n{}'.format(iris_dataset['target_names']))
print('feature names:\n{}'.format(iris_dataset['feature_names']))
print('type of data:\n{}'.format(type(iris_dataset['data'])))
print('shape of data:\n{}'.format(iris_dataset['data'].shape))
print('first five rows of data:\n{}'.format(iris_dataset['data'][:5]))
print('type of target:\n{}'.format(type(iris_dataset['target'])))
print('shape of target:\n{}'.format(type(iris_dataset['target'].shape)))
print('target:]n{}'.format(iris_dataset['target']))
x_train,x_test,y_train,y_test = train_test_split(
iris_dataset['data'],iris_dataset['target'],random_state=0)
print('x_train shape:{}'.format(x_train.shape))
print('y_train shape:{}'.format(y_train.shape))
print('x_test shape:{}'.format(x_test.shape))
print('y_test shape:{}'.format(y_test.shape))
iris_dataframe = pd.DataFrame(x_train,columns = iris_dataset.feature_names)
grr = pd.plotting.scatter_matrix(iris_dataframe,c=y_train,figsize=(15,15),marker='o',hist_kwds={'bins':20},s=60,alpha=.8,cmap=mglearn.cm3)
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(x_train,y_train)
x_new = np.array([[5,2.9,1,0.2]])
print('x_new.shape:\n{}'.format(x_new.shape))
prediction = knn.predict(x_new)
print('prediction:{}'.format(prediction))
print('predicted target name:{}'.format(iris_dataset['target_names'][prediction]))
y_pred = knn.predict(x_test)
print('test set predictions:\n{}'.format(y_pred))
print('test set score:{:.2f}'.format(np.mean(y_pred == y_test)))
plt.show()