有用的代码整理

一、KNN

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np


def knn(inputX, data, lable, tempK):
    # 1. 获取数据集的个数
    m = len(lable)
    # 2. 根据测试数据构建 m*1 的矩阵
    diffMat = np.tile(inputX, (m, 1))
    # 3. 求欧式距离
    distance = ((data - diffMat) ** 2).sum(axis=1) ** 0.5
    # 4. 从小到大按行排列, 返回排列之前对应的索引
    sortedIndex = distance.argsort(axis=0)
    # 5. 找到前 K 个数据的类别
    lables = []
    for i in range(0, tempK):
        lables.append(lable[sortedIndex[i]])
    # 6. 统计每个类别的数量
    count = {
    
    }
    for lable in lables:
        count[lable] = count.get(lable, 0) + 1
    # 7. 获取频次最多的类别,并返回
    return max(count, key=count.get)


# 1. 读取鸢尾花数据
iris = load_iris()

# 2. 交叉验证,将数据按照 3/1 分为训练数据,测试数据
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.25)

# 3. 读取训练数据
train_data = np.array(x_train)
train_lable = np.array(y_train)

# 4. 根据标签对训练数据进行分类
A, B, C = [], [], []
for idx in range(0, len(train_lable)):
    if train_lable[idx] == 0:
        A.append(train_data[idx])
    elif train_lable[idx] == 1:
        B.append(train_data[idx])
    else:
        C.append(train_data[idx])
A, B, C = np.array(A), np.array(B), np.array(C)

# 5. 读取测试数据
test_data = np.array(x_test)
test_lable = np.array(y_test)

# 6. 使用 KNN 分类器进行分类预测
for idx in range(0, len(test_lable)):
    ret = knn(test_data[idx], train_data, train_lable, 3)
    if ret == test_lable[idx]:
        print('预测正确')
    else:
        print('预测错误')

猜你喜欢

转载自blog.csdn.net/bugu_hhh/article/details/131357568