机器学习(4)-- KNN算法应用

KNN模型调用

在sklearn包中封装了KNN的模型。可以直接传入数据集进行调用
本文使用数据集 iris ,分别调用了sklearn的knn模块和自己编写的knn模块分析了结果,下面给出代码

knn调用

def iris_knn():
    from sklearn import neighbors
    from sklearn import datasets

    knn = neighbors.KNeighborsClassifier()
    iris = datasets.load_iris()   ## 从网络直接加载数据集
    with open('iris.data.csv','w') as f:   # 将数据集保存在本地
        data = iris.data
        label = iris.target
        for i in range(len(label)):
            for j in range(data[i]):
                f.write(str(data[i][j]) + ',')
            f.write(str(label[i]) + "\n")
    print(iris)
    print(iris.target_names)
    print(iris.feature_names)
    knn.fit(iris.data, iris.target)      # 训练

    predict = knn.predict([[5.9, 3, 5.1, 1.8]])  # 预测
    print(predict)

my knn

import csv
import random
import math
import operator

def load_data(filename, split):   
#读取数据filename,并且按照数据split的比例分割为训练集和测试集
    train_set = []
    test_set = []

    with open(filename, 'r') as f:
        lines = csv.reader(f)
        data_set = list(lines)
        for x in range(len(data_set)-1):
            for y in range(4):
                data_set[x][y] = float(data_set[x][y])
            if random.random() < split:
                train_set.append(data_set[x])
            else:
                test_set.append(data_set[x])
    return train_set, test_set


def get_distance(vec1, vec2):  # 计算两个点的距离
    distance = 0
    for x in range(len(vec1)):
        distance += pow(vec1[x]-vec2[x],2)
    return math.sqrt(distance)


def get_neighbors(train_set, testcase, k):   # 根据预测用例和训练集找到k neighbors
    neighbors = []
    distances = []
    for case in train_set:
        distances.append((case, get_distance(case[0:-2], testcase[0:-2])))
    distances.sort(key=operator.itemgetter(1))

    for i in range(k):
        neighbors.append(distances[i][0])

    return neighbors


def get_response(neighbors):   # k neighbors投票分类
    class_votes = {}
    for case in neighbors:
        response = case[-1]
        if response in class_votes:
            class_votes[response] += 1
        else:
            class_votes[response] = 1
    class_votes = sorted(class_votes.items(), key=operator.itemgetter(1), reverse=True)

    return class_votes[0][0]


def get_accuracy(test_set, predictions):
    correct = 0
    for i in range(len(test_set)):
        if test_set[i][-1] == predictions[i]:
            correct += 1
    return correct/(float(len(predictions)))


def my_knn():           
    train_set, test_set = load_data('./iris.data.csv',0.66)
    print(train_set)
    print(test_set)

    predictions = []
    k = 3
    for case in test_set:
        neighbors = get_neighbors(train_set, case, k)
        result = get_response(neighbors)
        predictions.append(result)
        print('>>> predict = ' + str(result) + ', actul = ' + str(case[-1]))

    accuracy = get_accuracy(test_set, predictions)
    print('Accuracy : ' + str(accuracy))


if __name__ == '__main__':
    #iris_knn()
    my_knn()

猜你喜欢

转载自blog.csdn.net/qq_38876114/article/details/93735749
今日推荐