C++ realizes handwriting recognition based on KNN

A system structure

insert image description here
insert image description here
insert image description here

2. Data acquisition and preprocessing

insert image description here
insert image description here

3. KNN algorithm and K-fold cross-validation

insert image description here
insert image description here
source:

#include "pch.h"
#include <iostream>
#include <fstream>
#include <string>
#include <math.h>
using namespace std;
//#define k 10   //KNN关键参数
#define mn 10
#define K_flod 15  //K折交叉验证
int k;
double train_weigh[8] = {
    
    1,1,1,1,1,1,1,1};//训练集不同汉字的权重值。
struct node {
    
    
 string nn, name, shuxing,sbname;
 //nn:以字符串类型存储每个 txt 文件产生的 01 矩阵
 //name:存储训练集的类别名称
 //shuxing:存储测试集最终判别的类别名字
 //sbname:测试字识别结果
 double dis, x1, x2, diss;
 bool s;
 //dis:存储测试字和训练字的欧式距离
 //diss:存储测试字和训练字的曼哈顿距离
 //x1:识别为当前测试字的k个欧式(曼哈顿)距离的比例
 //x2:识别为其他种类字体的k个欧式(曼哈顿)距离的比例
 node() {
    
    
  nn = "000"; name = "000"; shuxing = "000"; sbname = "000";
  dis = 0.0; x1 = 0.0; x2 = 0.0;
  s = false;
 }
};
node dis_o[720];//存储欧式(曼哈顿)距离
node ceshi[720];//存储测试集
double tru[720];//K折交叉验证每次验证的正确率
double err[720];//K折交叉验证每次验证的误差率
double K_tru[8];//K近邻时,每个汉字的最终正确率
//double K_err[8];//K近邻时,每个汉字最终错误率
string typp[8] = {
    
     "bei","jing","xin","xi","ke","ji","da","xue" };//可以识别汉字的种类
node a[720];//全部数据集
//将全部数据(720个128*128的二值化矩阵读入存储在a这个结构体数组中)
void read() {
    
    
 int i, j;
 string nn;
 string mi[90] = {
    
     "01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26" ,"27","28","29","30",
 "31","32","33","34","35","36","37","38","39","40","41","42","43","44","45","46","47","48","49","50","51","52","53","54","55","56" ,"57","58","59","60" ,
 "61","62","63","64","65","66","67","68","69","70","71","72","73","74","75","76","77","78","79","80","81","82","83","84","85","86" ,"87","88","89","90" };
 for (i = 0, j = 0; i < 90; i++, j++) {
    
    
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\bei_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
    
    
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "bei";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
    
    
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\jing_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
    
    
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "jing";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
    
    
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xin_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
    
    
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "xin";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
    
    
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xi_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
    
    
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "xi";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
    
    
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\ke_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
    
    
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "ke";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
    
    
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\ji_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
    
    
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "ji";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
    
    
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\da_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
    
    
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "da";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
    
    
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xue_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
    
    
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "xue";
  rf.close();
 }
}
//初始化训练集不同汉字的权重值。
void InitTrWei() {
    
    
 cout << "请分别输入8个汉字的权重值: ";
 cin >> train_weigh[0];
 cin >> train_weigh[1];
 cin >> train_weigh[2];
 cin >> train_weigh[3];
 cin >> train_weigh[4];
 cin >> train_weigh[5];
 cin >> train_weigh[6];
 cin >> train_weigh[7];
}
//计算欧式距离
double dis (node x1,node x2) {
    
    
 const char *ar1,*ar2;
 double sum = 0;
 int j;
 ar1 = x1.nn.c_str();
 ar2 = x2.nn.c_str();
 for (j = 0; j < 1024; j++) {
    
    
  sum += (ar1[j] - ar2[j])*(ar1[j] - ar2[j]);
 }
 return sqrt(sum);
}
//计算曼哈顿距离
double diss(node x1, node x2) {
    
    
 const char *ar1, *ar2;
 double sum = 0;
 int j;
 ar1 = x1.nn.c_str();
 ar2 = x2.nn.c_str();
 for (j = 0; j < 1024; j++) {
    
    
  sum += fabs(ar1[j] - ar2[j]);
 }
 return sum;
}
//快速排序
int Par(node *a,int low,int high) {
    
    
 node p = a[low];
 while(low<high){
    
    
  while (low < high&&a[high].dis >= p.dis)--high;
  a[low].name = a[high].name;
  a[low].dis = a[high].dis;
  while (low < high&&a[low].dis <= p.dis)++low;
  a[high].dis = a[low].dis;
  a[high].name = a[low].name;
 }
 a[low].dis = p.dis;
 a[low].name = p.name;
 return low;
}
void Qsort(node *a,int low ,int high) {
    
    
 if (low < high) {
    
    
  int p = Par(a, low, high);
  Qsort(a, low, p - 1);
  Qsort(a, p + 1, high);
 }
}
//判断某被测汉字的k最近邻测试结果
void K_select(node& p) {
    
    
 double a[8] = {
    
    0};
 int i;
 Qsort(dis_o, 0, (90 - 90 / K_flod)*8-1);
 for (i = 0;i<k;i++) {
    
    
  if (dis_o[i].name=="bei") {
    
    
   a[0]++;
  }
  if (dis_o[i].name == "jing") {
    
    
   a[1]++;
  }
  if (dis_o[i].name == "xin") {
    
    
   a[2]++;
  }
  if (dis_o[i].name == "xi") {
    
    
   a[3]++;
  }
  if (dis_o[i].name == "ke") {
    
    
   a[4]++;
  }
  if (dis_o[i].name == "ji") {
    
    
   a[5]++;
  }
  if (dis_o[i].name == "da") {
    
    
   a[6]++;
  }
  if (dis_o[i].name == "xue") {
    
    
   a[7]++;
  }
 }
 //乘上权重
 for (i = 0; i < 8; i++) {
    
    
  a[i] *= train_weigh[i];
 }
 double max = 0;
 int t=0;
 for(i=0;i<8;i++){
    
    
  if (a[i] > max) {
    
    
   max = a[i];
   t = i;
  }
 }
 switch (t) {
    
    
  case 0:p.sbname = "bei"; break;
  case 1:p.sbname = "jing"; break;
  case 2:p.sbname = "xin"; break;
  case 3:p.sbname = "xi"; break;
  case 4:p.sbname = "ke"; break;
  case 5:p.sbname = "ji"; break;
  case 6:p.sbname = "da"; break;
  case 7:p.sbname = "xue"; break;
 }
}
//分析测试集结果
double ceshiFenxi() {
    
    
 int i,j;
 double err=0, tru=0;
 for (i = 0; i < (90/K_flod)*8; i++) {
    
    
  if (ceshi[i].name == ceshi[i].sbname)
   tru++;
  else
   err++;
 }
 return tru / (tru + err);
}
//K折交叉验证
void k_K_flod(){
    
    
 int x1 = 0;//记录交叉验证第几折
 int i,j,t,jj,e;
 double b[8] = {
    
     0 };
 for (i = 0; i < 720; i++) {
    
    
  a[i].s = false;
 }
 for (x1 = 0; x1 < K_flod;x1++) {
    
    
  t = 0;
  //选出第x1折交叉验证的测试集
  for (e = 0; e < 8; e++) {
    
    
   for (j = 90 - (K_flod - x1)*(90 / K_flod)+e*90; j < 90 - (K_flod - x1)*(90 / K_flod) + 90 / K_flod+e*90; j++) {
    
    
    a[j].s = true;
   }
  }
  for (e = 0; e < 8; e++) {
    
    
   double tru = 0, err = 0;
   //计算第x1折交叉验证的每个字的测试集准确率
   for (i = 90 - (K_flod - x1)*(90 / K_flod) + e * 90; i < 90 - (K_flod - x1)*(90 / K_flod) + 90 / K_flod + e * 90; i++) {
    
    
    if (a[i].name == a[i].sbname)
     tru++;
    else
     err++;
   }
   b[e] += tru / (tru + err);
   }
  for (i = 0; i < 720; i++) {
    
    
   a[i].s = false;
  }
 }
 for (i = 0; i < 8; i++) {
    
    
  K_tru[i] = b[i] / K_flod;
 }
}
int main() {
    
    
 read();//将全部数据集读入
 string ader = "F:\\计算机综合实践\\15折交交叉验证_曼哈顿距离.txt";
 ofstream rf(ader);
 for (int j = 5; j <= 40; j++) {
    
    
  k = j;
  k_K_flod();//进行K折交叉验证得到每个汉字识别的正确率并保存到  K_tru   数组中
  for (int i = 0; i < 8; i++) {
    
    
   cout << K_tru[i] <<endl;
   rf << K_tru[i] << endl;
  }
  rf << endl << endl;
  cout << endl<<endl;
  }
  return 0;
  }

Guess you like

Origin blog.csdn.net/weixin_42529594/article/details/113120418