import numpy as np
from os import listdir
import operator
def img2vector(filename):
returnVect=np.zeros((1,1024))
fr=open(filename)
for i in range(32):
linStr=fr.readline()
for j in range(32):
returnVect[0,32*i+j]=int(linStr[j])
return returnVect
def classify0(inX,dataSet,labels,k):
dataSetSize=dataSet.shape[0]
diffMat=np.tile(inX,(dataSetSize,1))-dataSet
sqDiffMat=diffMat**2
sqDistances=sqDiffMat.sum(axis=1)
distances=sqDistances**0.5
sortedDistIndicies=distances.argsort()
classCount={}
for i in range(k):
voteIlabel=labels[sortedDistIndicies[i]]
classCount[voteIlabel]=classCount.get(voteIlabel,0)+1
sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),
reverse=True)
return sortedClassCount[0][0]
#对训练集数据进行分析
file='D:/anicode/spyderworkspace/examtest/trainingDigits'
file_name=listdir('D:/anicode/spyderworkspace/examtest/trainingDigits')
num_files=len(file_name)
clsLabel=[]
trainingMat=np.zeros((num_files,1024))
for i in range(num_files):
cls=file_name[i].split('.')[0]
clsNum=int(cls.split('_')[0])
clsLabel.append(clsNum)
file_name_one=file+'/'+file_name[i]
trainingMat[i,:]=img2vector(file_name_one)
#对测试集数据进行分析
test_file='D:/anicode/spyderworkspace/examtest/testDigits'
test_file_name=listdir('D:/anicode/spyderworkspace/examtest/testDigits')
test_num_files=len(test_file_name)
errorCount=0.0
for i in range(test_num_files):
test_cls=test_file_name[i].split('.')[0]
test_clsNum=int(test_cls.split('_')[0])
test_file_name_one=test_file+'/'+test_file_name[i]
testvector=img2vector(test_file_name_one)
clsResult=classify0(testvector,trainingMat,clsLabel,3)
print("the predict answer is %d,the real anser is %d"%(clsResult,test_clsNum))
if(clsResult!=test_clsNum):
errorCount+=1
print("总错误个数为:%d,错误率为:%f%%"%(errorCount,errorCount/float(test_num_files)*100))
机器学习实战笔记--KNN-mnist分类
猜你喜欢
转载自blog.csdn.net/sinat_38998284/article/details/81607671
今日推荐
周排行