sklearn机器学习算法--K近邻

K近邻

构建模型只需要保存训练数据集即可。想要对新数据点做出预测,算法会在训练数据集中找到最近的数据点,也就是它的“最近邻”。

1、K近邻分类

#第三步导入K近邻模型并实例化KN对象
from sklearn.neighbors import KNeighborsClassifier
#其中n_neighbors为近邻数量
clf = KNeighborsClassifier(n_neighbors=3)
#第四步对训练集进行训练
clf.fit(X_train,y_train)
#查看训练集和测试集的精确度
clf.score(X_train,y_train)
#建立一个有一行三列组成的图组,每个图的大小是10×3
fig, axes = plt.subplots(1,3,figsize=(10,3))
for n_neighbors,ax in zip([1,3,9],axes):
    #实例化模型对象并对数据进行训练
    clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X,y)
    mglearn.plots.plot_2d_separator(clf, X, fill=True, eps=0.5, ax=ax, alpha=.4)
    mglearn.discrete_scatter(X[:,0],X[:,1],y,ax=ax)
    ax.set_title("{} neighbor(s)".format(n_neighbors))
    ax.set_xlabel("feature 0")
    ax.set_ylabel("feature 1") 

针对乳腺癌数据进行不同近邻的精确度分析

#加载乳腺癌数据
from sklearn.datasets import load_breast_cancer
#提取数据
cancer = load_breast_cancer()
#第一步将数据分为训练集和测试集
X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,random_state = 0)
#实例化不同近邻的KN对象
neighbors_settings = range(1,11)
training_accuracy = []
test_accuracy = []
for n_neighbors in neighbors_settings:
    clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X_train,y_train)
    training_accuracy.append(clf.score(X_train,y_train))
    test_accuracy.append(clf.score(X_test,y_test))
plt.plot(neighbors_settings,training_accuracy,label='training accuracy')
plt.plot(neighbors_settings,test_accuracy,label='test accuracy')
plt.legend()

2、K近邻回归

针对wave数据进行K近邻回归演示

#导入wave数据
X,y = mglearn.datasets.make_wave()
#将数据分为训练集和测试集
X_train,X_test,y_train,y_test = train_test_split(X,y, random_state = 0)
#导入KN模型
from sklearn.neighbors import KNeighborsRegressor
#实例化KN模型
reg = KNeighborsRegressor(n_neighbors=3)
#对训练集进行训练
reg.fit(X_train,y_train)
#查看模型的精度
reg.score(X_test,y_test)
#创建一个有一行三列组成的图组,每个图的大小为15×4 fig, axes = plt.subplots(1,3,figsize=(15,4)) #创建1000个数据点,分布在-3和3之间 lines=np.linspace(-3,3,1000).reshape(-1,1) for n_neighbors, ax in zip([1,3,9],axes): reg = KNeighborsRegressor(n_neighbors=n_neighbors).fit(X_train,y_train) ax.plot(lines,reg.predict(lines)) ax.plot(X_train,y_train,'^',c=mglearn.cm2(0),markersize=8) ax.plot(X_test,y_test,'o',c=mglearn.cm2(1),markersize=8) ax.set_title('{} neighbor\n train score:{:.2f} test score:{:.2f}'.format(n_neighbors,reg.score(X_train,y_train), reg.score(X_test,y_test))) axes[0].legend(['model predictions','training data/target','test data/target'])

猜你喜欢

转载自www.cnblogs.com/xuexiaoxi/p/12012378.html