from sklearn import datasets
import random
from collections import Counter
import numpy as np
irs=datasets.load_iris()
#使用的数据集是sklearn中自带的数据集
#梳理k近邻算法
dataset=irs.data[:,1:3]
datalabel=irs.target
idx_data=np.arange(0,len(datalabel))
#测试集
idx_test=random.sample(idx_data.tolist(),20)
idx_test=np.sort(idx_test)
testset=dataset[idx_test,:]
#训练集
idx_train=[]
for i in idx_data:
try:
idx_test.tolist().index(i)
except:
idx_train.append(i)
trainset=dataset[idx_train]
def fun_kss(sample,trainset,topnum):
#求sample与训练数据的距离
dst= np.square(trainset-sample)
dst=dst.sum(axis=1)
#计算排序后索引
idxsort= np.argsort(dst)
return idxsort[:topnum]
def fun_label(labelset,labelidx):
labelrs=labelset[labelidx]
rs={}
for i in set(labelrs):
rs[i]=labelrs.tolist().count(i)
return max(rs,key=rs.get)
prers=0
#k近邻算法
for i in range(len(testset)):
idxsort= fun_kss(testset[i],trainset,topnum=5)
labelrs= fun_label(datalabel, np.array(idx_train)[idxsort])
#print('%s,%s'%(labelrs,datalabel[idx_test[i]]))
if labelrs==datalabel[idx_test[i]]:
prers+=1
print(prers)
print((prers/len(idx_test)*100))
#break
K近邻算法实现-Python
猜你喜欢
转载自blog.csdn.net/hyt182380/article/details/89135726
今日推荐
周排行