鸢尾花分类问题

来自于 《python机器学习基础教程》[德] Andreas C. Muller [美] Sarah Guido 著  张亮(hysic)译 的 第一章。

from sklearn.datasets import load_iris
iris_dataset = load_iris()


In [9]:
print( (iris_dataset.keys()))
dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])
In [17]:
print((iris_dataset['target']))
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
In [26]:
print((iris_dataset['feature_names']))
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
In [23]:
print( iris_dataset['data'].shape)
(150, 4)
In [33]:
from sklearn.model_selection import train_test_split
X_train,X_test, y_train,y_test = train_test_split( iris_dataset['data'] , iris_dataset['target'] , random_state=0)
In [46]:
import pandas as pd
import mglearn
iris_dataframe = pd.DataFrame(X_train, columns=iris_dataset.feature_names)
grr = pd.scatter_matrix( iris_dataframe , c =y_train , figsize=(15,15) , marker='o' , hist_kwds={'bins':20}, s =60 ,alpha=.8,cmap=mglearn.cm3)

In [47]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)
In [48]:
knn.fit(X_train , y_train)
Out[48]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform')
In [49]:
xnew = np.array([[5,2.9,1,0.2]])
pred = knn.predict(xnew)
print( pred )
print( iris_dataset["target_names"][pred])
[0]
['setosa']
In [53]:
y_pred=knn.predict(X_test)
print( np.mean(y_pred==y_test) , ' is the successful rate' )
0.9736842105263158  is the successful rate


猜你喜欢

转载自blog.csdn.net/brooknew/article/details/80915424