-KNN machine learning classification algorithm (on)

K nearest neighbor (k-NearestNeighbor)

k nearest neighbor algorithm machine learning algorithm is the simplest method, the following processes:
(1) to the test object is calculated from the training set for each object
(2) sorted according to distance from the
(3) selecting the test object with the current latest k objects as a neighbor of the test object
(4) statistical category frequency the k neighbors
(5) k neighbors in the highest frequency categories, is the category of the test object

python code implementation

1, self-realization KNN algorithm

import numpy as np
from math import sqrt
from collections import Counter

# 定义分类器
class kNNClassifier:

    def __init__(self, k):
        """初始化分类器"""
        assert k >= 1, "k must be valid"
        self.k = k
        self._X_train = None
        self._y_train = None

    def fit(self, X_train, y_train):
        """根据训练数据集X_train和y_train训练kNN分类器"""
        assert X_train.shape[0] == y_train.shape[0], \
            "the size of X_train must be equal to the size of y_train"
        assert self.k <= X_train.shape[0], \
            "the size of X_train must be at least k"
        self._X_train = X_train
        self._y_train = y_train
        return self

    def predict(self,X_predict):
        """给定待预测数据集X_predict,返回表示X_predict结果的向量"""
        assert self._X_train is not None and self._y_train is not None, \
            "must fit before predict!"
        assert X_predict.shape[1] == self._X_train.shape[1], \
            "the feature number of X_predict must be equal to X_train"
        y_predict = [self._predict(x) for x in X_predict]
        return np.array(y_predict)

    def _predict(self, x):
        distances = [sqrt(np.sum((x_train - x) ** 2)) for x_train in self._X_train]
        nearest = np.argsort(distances)
        topK_y = [self._y_train[i] for i in nearest]
        votes = Counter(topK_y)
        return votes.most_common(1)[0][0]

    def score(self, X_test, y_test):
        """根据X_test进行预测, 给出预测的真值y_test,计算预测模型的准确度"""
        y_predict = self.predict(X_test)
        return self.accuracy_score(y_test, y_predict)

    def accuracy_score(y_true, y_predict):
        """计算y_true和y_predict之间的准确率"""
        assert y_true.shape[0] != y_predict.shape[0], \
            "the size of y_true must be equal to the size of y_predict"
        return sum(y_true == y_predict) / len(y_true)

    def __repr__(self):
        return "kNN(k=%d)" % self.k

raw_data_X = [[3.393533211, 2.331273381],
              [3.110073483, 1.781539638],
              [1.343853454, 3.368312451],
              [3.582294121, 4.679917921],
              [2.280362211, 2.866990212],
              [7.423436752, 4.685324231],
              [5.745231231, 3.532131321],
              [9.172112222, 2.511113104],
              [7.927841231, 3.421455345],
              [7.939831414, 0.791631213]
             ]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
# 设置训练组
trainX = np.array(raw_data_X)
trainY = np.array(raw_data_y)
# 预测数据
x1 = np.array([8.093607318,3.365731514])

knn_clf = kNNClassifier(k=6)
knn_clf.fit(trainX, trainY)
predict_X = x1.reshape(1,-1)
predict_Y = knn_clf.predict(predict_X)
print(predict_Y)

2, call sklearn algorithm library

import numpy as np
from sklearn.neighbors import KNeighborsClassifier

raw_data_X = [[3.393533211, 2.331273381],
              [3.110073483, 1.781539638],
              [1.343853454, 3.368312451],
              [3.582294121, 4.679917921],
              [2.280362211, 2.866990212],
              [7.423436752, 4.685324231],
              [5.745231231, 3.532131321],
              [9.172112222, 2.511113104],
              [7.927841231, 3.421455345],
              [7.939831414, 0.791631213]
             ]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] # 设置训练组
X_train = np.array(raw_data_X)
y_train = np.array(raw_data_y) # 将数据可视化

x=np.array([8.093607318,3.365731514])

# 创建kNN_classifier实例
kNN_classifier = KNeighborsClassifier(n_neighbors=6)
# kNN_classifier做一遍fit(拟合)的过程,没有返回值,模型就存储在kNN_classifier实例中
kNN_classifier.fit(X_train, y_train)
# kNN进行预测predict,需要传入一个矩阵,而不能是一个数组
y_predict = kNN_classifier.predict(x.reshape(1,-1))
print(y_predict)

The relevant code has been uploaded GitHub , learning to use for reference purposes only.

Problems encountered:
1, Euclidean distance
2, random_state = 20 effect
3, reshape (1, -1) Meaning
4, ** represents the power of 2

Published 118 original articles · won praise 25 · Views 150,000 +

Guess you like

Origin blog.csdn.net/lhxsir/article/details/102999579