K近邻算法实现-Python

from sklearn import datasets
import random
from collections import Counter
import numpy as np

irs=datasets.load_iris()
#使用的数据集是sklearn中自带的数据集
#梳理k近邻算法
dataset=irs.data[:,1:3]
datalabel=irs.target
idx_data=np.arange(0,len(datalabel))

#测试集
idx_test=random.sample(idx_data.tolist(),20)
idx_test=np.sort(idx_test)
testset=dataset[idx_test,:]
#训练集
idx_train=[]
for i in idx_data:
    try:
        idx_test.tolist().index(i)
    except:
        idx_train.append(i)
        
trainset=dataset[idx_train]

def fun_kss(sample,trainset,topnum):
    #求sample与训练数据的距离
    dst= np.square(trainset-sample)
    dst=dst.sum(axis=1)
    #计算排序后索引
    idxsort= np.argsort(dst)
    return idxsort[:topnum]
def fun_label(labelset,labelidx):
    labelrs=labelset[labelidx]
    rs={}
    for i in set(labelrs):
        rs[i]=labelrs.tolist().count(i)
    return max(rs,key=rs.get) 
    

prers=0
#k近邻算法
for i in range(len(testset)):
    idxsort= fun_kss(testset[i],trainset,topnum=5)
    labelrs= fun_label(datalabel, np.array(idx_train)[idxsort])
    #print('%s,%s'%(labelrs,datalabel[idx_test[i]]))
    if labelrs==datalabel[idx_test[i]]:
        prers+=1
print(prers)
print((prers/len(idx_test)*100))
 #break
发布了40 篇原创文章 · 获赞 1 · 访问量 4498

猜你喜欢

转载自blog.csdn.net/hyt182380/article/details/89135726