Machine Learning Basics-KNN (K-Nearest Neighbor)-11

Insert picture description here
Insert picture description here
Insert picture description here
Insert picture description here
Insert picture description here
Insert picture description here
Insert picture description here

KNN algorithm implementation

import matplotlib.pyplot as plt
import numpy as np
import operator

# 已知分类的数据
x1 = np.array([3,2,1])
y1 = np.array([104,100,81])
x2 = np.array([101,99,98])
y2 = np.array([10,5,2])
scatter1 = plt.scatter(x1,y1,c='r')
scatter2 = plt.scatter(x2,y2,c='b')

# 未知数据
x = np.array([18])
y = np.array([90])
scatter3 = plt.scatter(x,y,c='k')


#画图例
plt.legend(handles=[scatter1,scatter2,scatter3],labels=['labelA','labelB','X'],loc='best')

plt.show()

Insert picture description here

# 已知分类的数据
x_data = np.array([[3,104],
                   [2,100],
                   [1,81],
                   [101,10],
                   [99,5],
                   [81,2]])
y_data = np.array(['A','A','A','B','B','B'])
x_test = np.array([18,90])
# 计算样本数量
x_data_size = x_data.shape[0]
x_data_size
# 复制x_test
np.tile(x_test, (x_data_size,1))

Insert picture description here

# 计算x_test与每一个样本的差值
diffMat = np.tile(x_test, (x_data_size,1)) - x_data
diffMat

Insert picture description here

# 计算差值的平方
sqDiffMat = diffMat**2
sqDiffMat

Insert picture description here

# 求和
sqDistances = sqDiffMat.sum(axis=1)
sqDistances

Insert picture description here

# 开方
distances = sqDistances**0.5
distances

Insert picture description here

# 从小到大排序
sortedDistances = distances.argsort()
sortedDistances

Insert picture description here

classCount = {
    
    }
# 设置k
k = 5
for i in range(k):
    # 获取标签
    votelabel = y_data[sortedDistances[i]]
    # 统计标签数量
    classCount[votelabel] = classCount.get(votelabel,0) + 1
classCount

Insert picture description here

# 根据operator.itemgetter(1)-第1个值对classCount排序,然后再取倒序
sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1), reverse=True)
sortedClassCount

Insert picture description here

# 获取数量最多的标签
knnclass = sortedClassCount[0][0]
knnclass

Insert picture description here
Insert picture description here

KNN-iris

# 导入算法包以及数据集
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
import operator
import random
def knn(x_test, x_data, y_data, k):
    # 计算样本数量
    x_data_size = x_data.shape[0]
    # 复制x_test
    np.tile(x_test, (x_data_size,1))
    # 计算x_test与每一个样本的差值
    diffMat = np.tile(x_test, (x_data_size,1)) - x_data
    # 计算差值的平方
    sqDiffMat = diffMat**2
    # 求和
    sqDistances = sqDiffMat.sum(axis=1)
    # 开方
    distances = sqDistances**0.5
    # 从小到大排序
    sortedDistances = distances.argsort()
    classCount = {
    
    }
    for i in range(k):
        # 获取标签
        votelabel = y_data[sortedDistances[i]]
        # 统计标签数量
        classCount[votelabel] = classCount.get(votelabel,0) + 1
    # 根据operator.itemgetter(1)-第1个值对classCount排序,然后再取倒序
    sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1), reverse=True)
    # 获取数量最多的标签
    return sortedClassCount[0][0]
# 载入数据
iris = datasets.load_iris()
# x_train,x_test,y_train,y_test = train_test_split(iris.data, iris.target, test_size=0.2) #分割数据0.2为测试数据,0.8为训练数据

#打乱数据
data_size = iris.data.shape[0]
index = [i for i in range(data_size)] 
random.shuffle(index)  
iris.data = iris.data[index]
iris.target = iris.target[index]

#切分数据集
test_size = 40
x_train = iris.data[test_size:]
x_test =  iris.data[:test_size]
y_train = iris.target[test_size:]
y_test = iris.target[:test_size]

predictions = []
for i in range(x_test.shape[0]):
    predictions.append(knn(x_test[i], x_train, y_train, 5))

print(classification_report(y_test, predictions))

Insert picture description here

print(confusion_matrix(y_test,predictions))

Insert picture description here

sklearn-KNN-iris

# 导入算法包以及数据集
from sklearn import neighbors
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import random
# 载入数据
iris = datasets.load_iris()
print(iris)

Insert picture description here

# 打乱数据切分数据集
# x_train,x_test,y_train,y_test = train_test_split(iris.data, iris.target, test_size=0.2) #分割数据0.2为测试数据,0.8为训练数据

#打乱数据
data_size = iris.data.shape[0]
index = [i for i in range(data_size)] 
random.shuffle(index)  
iris.data = iris.data[index]
iris.target = iris.target[index]

#切分数据集
test_size = 40
x_train = iris.data[test_size:]
x_test =  iris.data[:test_size]
y_train = iris.target[test_size:]
y_test = iris.target[:test_size]

# 构建模型
model = neighbors.KNeighborsClassifier(n_neighbors=3)
model.fit(x_train, y_train)
prediction = model.predict(x_test)

print(classification_report(y_test, prediction))

Insert picture description here

Guess you like

Origin blog.csdn.net/qq_37978800/article/details/113842791