用Python手写knn算法进行鸢尾花分类预测(不用knn模块)。
knn算法全称是K-Nearest Neighbor
#########################★ KNN 算法在iris数据集的实现 ★##########################
import numpy as np
from sklearn.datasets import load_iris
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
################★ 读入鸢尾花数据集 ★################
iris = load_iris()
print("查看dict_keys:\n",iris.keys())
print("查看特征名字:",iris.feature_names)
X = iris.data
y = iris.target
print("标签种类:", set(y))
ns = X.shape[0] # 样本的数量ns
nf = X.shape[1] # 特征的数量nf
nc = len(set(y)) # 类别的数量nc
print("样本的数量:{},特征的数量:{}, 类别的数量:{}。".format(ns, nf, nc))
nk = 15 ## 人为指定最近邻的个数 (Knn中的n_neighbors)
################★ 切分训练集和测试集 ★################
X, y = shuffle(X, y, random_state=78)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.6, random_state=28)
num_train = X_train.shape[0] # 训练集个数
num_test = X_test.shape[0] # 测试集个数
################★ 求测试集中每个点的最近邻 ★################
y_pred=np.zeros([num_test])
for i in range(num_test):
xt = X_test[i] - X_train
dt = np.linalg.norm(xt, ord=2, axis=1, keepdims=True)
D = dt.ravel() # D是某一个测试样本和所有训练样本的距离数组
sorted_index = np.argsort(D)
yst = y_train[sorted_index]
yt = yst[0:nk]
cc = [np.sum(yt==j) for j in range(nc)]
z = np.argmax(cc, axis=0)
y_pred[i] = z
################★ 预测精度 ★################
acc = np.sum(y_pred==y_test)/len(y_test)
print("预测精度:", acc)
运行结果: