1, the leader packet
import numpy as np import operator from os import listdir from sklearn.neighbors import KNeighborsClassifier as KNN %config ZMQInteractiveShell.ast_node_interactivity='all'
2, the definition of a function to convert the image vectors
"" " Function Description: Converts a binary image 32x32 into 1x1024 vector of the Parameters: filename - the file name Returns: returnVect - 1x1024 vector return binary image ", "" DEF img2vector (filename): # create 1x1024 zero vector returnVect = np. zeros ((. 1, 1024 )) # open files fr = open (filename) # rows read for I in Range (32 ): # read data line lineStr = fr.readline () # the first 32 elements in each row a added to the returnVect for J in Range (32 ): returnVect [0,* I + J 32] = int (lineStr [J]) # the return 1x1024 vector conversion return returnVect
3, the definition of handwritten numeral recognition system function
"" " Function Description: Handwriting digital Category Test Parameters: None Returns: None ." "" DEF handwritingClassTest (): # training set Labels hwLabels = [] # Returns the file name under trainingDigits directory trainingFileList = listdir ( ' trainingDigits ' ) # returns the file folder number m = len (trainingFileList) # initialization Mat training matrix, the training set trainingMat np.zeros = ((m, 1024 )) # from a centralized file parsed category training set for I in Range (m): # get file name = fileNameStr trainingFileList [I] # obtained classification numbers classNumber = int (fileNameStr.split ( ' _ ' ) [0]) # Add category obtained in the hwLabels hwLabels.append (classNumber) # The data of each file of 1x1024 stored trainingMat matrix trainingMat [I,:] = img2vector ( ' trainingDigits / S% ' % (fileNameStr)) # Construction KNN classifier neigh = KNN (N_NEIGHBORS =. 3, algorithm = ' Auto ' ) # fit model, trainingMat for the training matrix, hwLabels the corresponding label neigh.fit (trainingMat, hwLabels) #Returns the file in testDigits directory listing testFileList = the listdir ( ' testDigits ' ) # error check count errorCount = 0.0 # number of test data MTEST = len (testFileList) # parses category test set and classify test from a file for I in Range (MTEST): # obtain a file name fileNameStr = testFileList [I] # obtained classification numbers classNumber = int (fileNameStr.split ( ' _ ' ) [0]) # obtained test set 1x1024 vectors used for training vectorUnderTest = img2vector ( ' testDigits / S% ' % (FileNameStr)) # get predictions classifierResult = neigh.predict (vectorUnderTest) # print Print ( ' classification returns a value of% d \ t real results d% ' % (classifierResult, classNumber)) IF (classifierResult! = ClassNumber ): errorCount + = 1.0 Print ( ' total wrong data% d \ n error rate %% F% ' % (errorCount, errorCount / MTEST * 100))
4 run results
if __name__ == "__main__": handwritingClassTest()
Dataset Address:
Link: https: //pan.baidu.com/s/1-F2LyVh63i4yjIwweTYjNg
extraction code: 3gsa
reference:
1, "machine learning real" books
2、https://github.com/apachecn/AiLearning
3、https://cuijiahua.com/blog/2017/11/ml_1_knn.html
4, eye depth of machine learning combat training camp Homework ( http://www.deepshare.net/ )