数据准备:
采用 CK+ 的数据集作为训练集 和验证集
深度学习 流程
1.CNN 提取特征
2.FC 基于特征进行分类。
3.SoftMaxwithLoss 进行 loss 定义
建立网络 如下图所示:
1.引入了inception 模块来增加网络的宽度。
2.pool3使用 ave pooling 来减少 FC层的使用,可以有效减少参数。
3.训练迭代50000次后,准确率 在 97% 左右。
传统实现流程
1.使用HOG提取图片特征。
2.使用 SVM 对特征进行分类。
3.训练后 查看测试集准确率(准确率 99% ,可能是因为数据不多的原因,所以验证集准确率比较高 【训练集:1600张, 验证集:130张】)。
代码中 包含 【1.使用hog特征, 2.使用lbp特征 +PCA降维 3.使用 旋转不变性的lbp特征】
代码实现:
1.util.h #pragma once #include <iostream> #include <opencv2/opencv.hpp> namespace glasssix { struct Label { std::string fileName; int label; }; const int HOG_FEATURE = 0; const int SIFT_FEATURE = 1; const int LBP_FEATURE = 2; class DataPrepare { public: DataPrepare() {}; ~DataPrepare() {}; int getImageHogFeature(cv::Mat &img, std::vector<float> & descriptors, cv::Size & size); int getImageSiftFeature(cv::Mat &img, std::vector<float> & descriptors, cv::Size & size); int getImageLBPFeature(cv::Mat &img, std::vector<float> & descriptors, cv::Size & size, int radias = 1); void PCA_Reduce(cv::Mat & input_train, cv::Mat & input_test, cv::Mat & output_train, cv::Mat & output_test); void readDirFile(std::string dir, std::vector<Label> & data); void getData(std::string dir, cv::Mat & data, cv::Mat & label, int flage = HOG_FEATURE); void SVM_Train(cv::Mat & data, cv::Mat &label, std::string save_path = "svm.xml"); void SVM_Predict(std::string mode, cv::Mat & data, cv::Mat &label); void libSVM_Train(cv::Mat & data, cv::Mat &label, std::string save_path = "libsvm.xml"); void libSVM_Predict(std::string mode, cv::Mat & data, cv::Mat &label); }; } 2.util.cpp #include "util.h" #include <opencv2/opencv.hpp> #include <opencv2/xfeatures2d.hpp> #include <opencv2/ml/ml.hpp> #include <iostream> #include <fstream> #include "../../../../SoftWare/libsvm-3.22/svm.h" using namespace std; using namespace cv; using namespace glasssix; using namespace ml; const int SIFT_NUM = 30; string str_label[] = { "中性", "愤怒", "蔑视", "厌恶", "恐惧", "高兴", "悲伤", "惊讶" }; template <class Type> Type stringToNum(const string& str) { istringstream iss(str); Type num; iss >> num; return num; } void splitString(const string& s, vector<string>& v, const string& c) { string::size_type pos1, pos2; pos2 = s.find(c); pos1 = 0; while (string::npos != pos2) { v.push_back(s.substr(pos1, pos2 - pos1)); pos1 = pos2 + c.size(); pos2 = s.find(c, pos1); } if (pos1 != s.length()) { v.push_back(s.substr(pos1)); } } int DataPrepare::getImageHogFeature(Mat &img, vector<float> & descriptors, Size & size) { if (img.data == NULL) { cout << "No exist" << endl; return -1; } resize(img, img, size); HOGDescriptor *hog = new HOGDescriptor(size, Size(16, 16), Size(4, 4), Size(8, 8), 9); hog->compute(img, descriptors, Size(1, 1), Size(0, 0)); cout << "descriptors size is :" << descriptors.size() << endl; return 0; } void lbp_circle(Mat& src, Mat &dst, int radius, int neighbors) { for (int n = 0; n < neighbors; n++) { // 采样点的计算 float x = static_cast<float>(-radius * sin(2.0*CV_PI*n / static_cast<float>(neighbors))); float y = static_cast<float>(radius * cos(2.0*CV_PI*n / static_cast<float>(neighbors))); // 上取整和下取整的值 int fx = static_cast<int>(floor(x)); int fy = static_cast<int>(floor(y)); int cx = static_cast<int>(ceil(x)); int cy = static_cast<int>(ceil(y)); // 小数部分 float ty = y - fy; float tx = x - fx; // 设置插值权重 float w1 = (1 - tx) * (1 - ty); float w2 = tx * (1 - ty); float w3 = (1 - tx) * ty; float w4 = tx * ty; // 循环处理图像数据 for (int i = radius; i < src.rows - radius; i++) { for (int j = radius; j < src.cols - radius; j++) { // 计算插值 float t = static_cast<float>(w1*src.at<uchar>(i + fy, j + fx) + w2*src.at<uchar>(i + fy, j + cx) + w3*src.at<uchar>(i + cy, j + fx) + w4*src.at<uchar>(i + cy, j + cx)); // 进行编码 当t>=src(i,j)的时候取1,并进行相应的移位 避免 精度 损失:std::abs(t - src.at<uchar>(i, j)) < std::numeric_limits<float>::epsilon()) dst.at<uchar>(i - radius, j - radius) += ((t > src.at<uchar>(i, j)) || (std::abs(t - src.at<uchar>(i, j)) < std::numeric_limits<float>::epsilon())) << n; } } } } //旋转不变 Mapping range = 2^neighbors void rotation_invariant_mapping(Mat & src, int range, int neighbors, int *Mapping) { int newMax, rm, r; int *tmpMap; newMax = 0; tmpMap = (int *)malloc(sizeof(int)*range); memset(tmpMap, -1, sizeof(int)*range); for (int i = 0; i < range; i++) { rm = i; r = i; for (int j = 0; j < neighbors - 1; j++) { //将r向左循环移动一位,当r超过num_sp位时,舍弃 r = r << 1; if (r > range - 1) { r = r - (range - 1); } if (r < rm) { rm = r; } } if (tmpMap[rm] < 0) { tmpMap[rm] = newMax; newMax++; } Mapping[i] = tmpMap[rm]; } for (int i = 0; i < src.cols; i++) { for (int j = 0; j < src.rows; j++) { src.at<uchar>(i, j) = Mapping[src.at<uchar>(i, j)]; } } free(tmpMap); } int calc_sum(int r) { int res_sum; res_sum = 0; while (r) { res_sum = res_sum + r % 2; r /= 2; } return res_sum; } //计算旋转不变 + 等价LBP特征 void rotation_uniform_invariant_mapping(Mat & src, int range, int num_sp, int *Mapping) { int numt, i, j, tem_xor; numt = 0; tem_xor = 0; for (i = 0; i < range; i++) { j = i << 1; if (j > range - 1) { j = j - (range - 1); } tem_xor = i ^ j; // 异或 numt = calc_sum(tem_xor);//计算异或结果中1的个数,即跳变个数 if (numt <= 2) { Mapping[i] = calc_sum(i); } else { Mapping[i] = num_sp + 1; } } for (int i = 0; i < src.cols; i++) { for (int j = 0; j < src.rows; j++) { src.at<uchar>(i, j) = Mapping[src.at<uchar>(i, j)]; } } } void lbp_normal(Mat& src, Mat &dst) { // 循环处理图像数据 for (int i = 1; i < src.rows - 1; i++) { for (int j = 1; j < src.cols - 1; j++) { uchar tt = 0; int tt1 = 0; uchar u = src.at<uchar>(i, j); if (src.at<uchar>(i - 1, j - 1) > u) { tt += 1 << tt1; } tt1++; if (src.at<uchar>(i - 1, j) > u) { tt += 1 << tt1; } tt1++; if (src.at<uchar>(i - 1, j + 1) > u) { tt += 1 << tt1; } tt1++; if (src.at<uchar>(i, j + 1) > u) { tt += 1 << tt1; } tt1++; if (src.at<uchar>(i + 1, j + 1) > u) { tt += 1 << tt1; } tt1++; if (src.at<uchar>(i + 1, j) > u) { tt += 1 << tt1; } tt1++; if (src.at<uchar>(i + 1, j - 1) > u) { tt += 1 << tt1; } tt1++; if (src.at<uchar>(i - 1, j) > u) { tt += 1 << tt1; } tt1++; dst.at<uchar>(i - 1, j - 1) = tt; } } } void lbp_to_feature(Mat & src, vector<float> &descriptors, int num) { int rows = src.rows; int cols = src.cols; Rect block = Rect(0, 0, rows / 3, cols / 3); Mat roi; int fea[256] = { 0 }; for (int k = 0; k < 3; k++) { for (int n = 0; n < 3; n++) { block.x = k * rows / 3; block.y = n * cols / 3; roi = src(block); fea[256] = { 0 }; for (int i = 0; i < roi.rows; i++) { for (int j = 0; j < roi.cols; j++) { fea[roi.at<uchar>(i, j)]++; } } for (int i = 0; i < num; i++) { descriptors.push_back(fea[i] / (16 * 16.0)); } } } } int DataPrepare::getImageLBPFeature(Mat &img, vector<float> & descriptors, Size & size, int radias) { if (img.data == NULL) { cout << "No exist" << endl; return -1; } resize(img, img, Size(size.width + radias * 2, size.height + radias * 2)); Mat dst = Mat(img.rows - 2 * radias, img.cols - 2 * radias, CV_8UC1, Scalar(0));; //lbp_normal(img, dst); int neighbors = 8; int range = pow(2, neighbors); lbp_circle(img, dst, radias, neighbors); //int * map = (int *)malloc(sizeof(int)* range); ////rotation_invariant_mapping(dst, range, neighbors, map); //rotation_uniform_invariant_mapping(dst, range, neighbors, map); lbp_to_feature(dst, descriptors, 256); //cout << "descriptors is :" << descriptors.size() << " "<< descriptors[0] << endl; return 0; } int DataPrepare::getImageSiftFeature(cv::Mat &img, std::vector<float> & descriptors, cv::Size & size) { if (img.data == NULL) { cout << "No exist" << endl; return -1; } resize(img, img, size); Ptr<Feature2D> sift = xfeatures2d::SIFT::create(SIFT_NUM, 3, 0.01, 80); std::vector<KeyPoint> keypointsa; keypointsa.clear(); Mat a;//特征点描述 //sift->detectAndCompute(src, mask, keypointsa, a);//得到特征点和特征点描述 sift->detectAndCompute(img, Mat(), keypointsa, a); cout << "a length is :" << a.rows << " " << a.cols << " " << a.channels() << endl; a.reshape(1, SIFT_NUM * 128); //cout << "a length is :" << a.rows << " " << a.cols << " " << a.channels() << endl; cout << "descriptors shape is :" << descriptors.size() << endl; } void DataPrepare::readDirFile(std::string filePath, std::vector<Label> & data) { vector<Label> result; ifstream fileA(filePath); if (!fileA) { cout << "没有找到需要读取的 " << filePath << " 请将文件放到指定位置再次运行本程序。" << endl << " 按任意键以退出"; } for (int i = 0; !fileA.eof(); i++) { Label lb; string buf; getline(fileA, buf, '\n'); if (buf == "") { cout << "buf is empty." << endl; continue; } vector<string> result; splitString(buf, result, " "); lb.fileName = result[0]; lb.label = stringToNum<int>(result[1]); data.push_back(lb); } fileA.close(); } void DataPrepare::getData(std::string dir, cv::Mat & data, cv::Mat & label, int flage) { std::vector<Label> vec_data; readDirFile(dir, vec_data); int length = vec_data.size(); cout << "train image is :" << length << endl; int n = 0; int featureNum = 0; for (int i = 0; i < length; i++) { Mat img = imread(vec_data[i].fileName, CV_LOAD_IMAGE_GRAYSCALE); std::vector<float> descriptors; if (flage == HOG_FEATURE) { getImageHogFeature(img, descriptors, Size(48, 48)); } else if (flage == SIFT_FEATURE) { getImageSiftFeature(img, descriptors, Size(48, 48)); } else if (flage == LBP_FEATURE) { getImageLBPFeature(img, descriptors, Size(48, 48), 1); } if (i == 0) { featureNum = descriptors.size(); label = Mat::zeros(length, 1, CV_32SC1); //注意其中训练和自动训练的接口,还有labelMat一定要用CV_32SC1的类型 data = Mat::zeros(length, descriptors.size(), CV_32FC1); } label.at<int>(i, 0) = vec_data[i].label; n = 0; for (std::vector<float>::iterator iter = descriptors.begin(); iter != descriptors.end(); iter++) { data.at<float>(i, n) = *iter; n++; } } } void DataPrepare::SVM_Train(cv::Mat & data, cv::Mat &label, string save_path) { Ptr<SVM> svm = SVM::create(); svm->setType(SVM::C_SVC); svm->setKernel(SVM::RBF); TermCriteria ctr = TermCriteria(CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, FLT_EPSILON); svm->setTermCriteria(ctr); Ptr<TrainData> traindata = ml::TrainData::create(data, ROW_SAMPLE, label); double t0 = (double)cvGetTickCount(); cout << "start train .." << endl; svm->trainAuto(traindata, 10); double t1 = (double)cvGetTickCount(); cout << "train end . cost time is :" << ((t1 - t0) / ((double)cvGetTickFrequency() * 1000 * 1000)) / 60.0 << " minute" << endl; svm->save(save_path); //float response = svm->predict(sampleMat); //cout <<" response is :" <<response << endl; } void DataPrepare::SVM_Predict(string mode, cv::Mat & data, cv::Mat &label) { Ptr<SVM> model = Algorithm::load<SVM>(mode); int rows = data.rows; int cols = data.cols; Rect r; r.height = 1; r.width = cols; r.x = 0; Mat feature; int error = 0; double t0 = (double)cvGetTickCount(); for (int i = 0; i < rows; i++) { //float * feature = data.ptr<float>(i); r.y = i; feature = data(r); int result = model->predict(feature); if (result != label.at<int>(i, 0)) { cout << "has a error ..predict label is :" << result << " really label is:" << label.at<int>(i, 0) << endl; cout << "predict is :" << str_label[result] << " really label is:" << str_label[label.at<int>(i, 0)] << endl; error++; } } double t1 = (double)cvGetTickCount(); cout << "average cost time is: " << ((t1 - t0) / ((double)cvGetTickFrequency() * 1000 * 1000)) * 1000.0 / rows << "ms" << endl; cout << "accuracy is : " << (float)(rows - error) / rows << endl; } void DataPrepare::PCA_Reduce(cv::Mat & input_train, cv::Mat & input_test, cv::Mat & output_train, cv::Mat & output_test) { cout << "start pca" << endl; double t0 = (double)cvGetTickCount(); PCA pca(input_train, Mat(), PCA::DATA_AS_ROW, 0.97); cout << "end pca" << endl; double t1 = (double)cvGetTickCount(); cout << "cost time is: " << ((t1 - t0) / ((double)cvGetTickFrequency() * 1000 * 1000)) << "s" << endl; //cout << pca.eigenvalues << endl; //cout << pca.eigenvectors << endl; output_train = pca.project(input_train); output_test = pca.project(input_test); cout << " point size :" << output_train.rows << " " << output_train.cols << endl; } svm_parameter param; void init_param() { param.svm_type = C_SVC; param.kernel_type = RBF; param.degree = 3; param.gamma = 0.1; param.coef0 = 0; param.nu = 0.5; param.cache_size = 1000; param.C = 20; param.eps = 1e-6; param.shrinking = 1; param.probability = 0; param.nr_weight = 0; param.weight_label = NULL; param.weight = NULL; } void DataPrepare::libSVM_Train(cv::Mat & data, cv::Mat &label, string save_path) { init_param(); int rows = data.rows; int cols = data.cols; svm_problem prob; prob.l = rows; svm_node *x_space = new svm_node[(cols + 1)*prob.l];//样本特征存储空间 prob.x = new svm_node *[prob.l]; //每一个X指向一个样本 cout << "size :" << sizeof(x_space) << endl; prob.y = new double[prob.l]; //libsvm train data prepare. for (int i = 0; i < rows; i++) { for (int j = 0; j < cols + 1; j++) { if (j == cols) { x_space[i*(cols + 1) + j].index = -1; prob.x[i] = &x_space[i * (cols + 1)]; prob.y[i] = label.at<uchar>(i, 0); break; } x_space[i*(cols + 1) + j].index = j + 1; x_space[i*(cols + 1) + j].value = data.at<float>(i, j); } } cout << "start train svm." << endl; svm_model *model = svm_train(&prob, ¶m); cout << "save model" << endl; svm_save_model(save_path.c_str(), model); cout << "done!" << endl; delete[] x_space; delete[] prob.x; delete[] prob.y; } void DataPrepare::libSVM_Predict(std::string mode, cv::Mat & data, cv::Mat &label) { svm_model* model = svm_load_model(mode.c_str()); int test_cols = data.cols; int test_rows = data.rows; svm_node *test_space = new svm_node[test_cols + 1]; //svm_problem prob_test; //libsvm test data prepare. int error = 0; double t0 = (double)cvGetTickCount(); for (int i = 0; i < test_rows; i++) { for (int j = 0; j < test_cols + 1; j++) { if (j == test_cols) { test_space[j].index = -1; break; } test_space[j].index = j + 1; test_space[j].value = data.at<float>(i, j); } int d = svm_predict(model, test_space); if (d != label.at<int>(i, 0)) { cout << "predict is :" << d << " really is :" << label.at<int>(i, 0) << endl; error++; } } double t1 = (double)cvGetTickCount(); cout << "average time is: " << ((t1 - t0) / ((double)cvGetTickFrequency() * 1000 * 1000))*1000.0 / test_rows << "ms" << endl; cout << "acurcy is :" << (float)(test_rows - error) / test_rows << endl; delete[] test_space; } 3.main.cpp #include <iostream> #include <opencv2/core/core.hpp> #include <opencv2/xfeatures2d.hpp> #include <opencv2/opencv.hpp> #include "util/util.h" #include "../../../SoftWare/libsvm-3.22/svm.h" using namespace cv; using namespace std; using namespace glasssix; string base = "C:\\WorkSpace\\Word\\emotion_reconginzed\\data_ck\\"; string train_text = base + "really_path_train.txt"; string test_text = base + "really_path_test.txt"; string mode = "svm.xml"; string svm_mode = "modelFileName.xml"; int main(int argc, char** argv) { DataPrepare dp; /* //1. HOG + SVM std::vector<Label> data; Mat train_data, train_label; Mat test_data, test_label; dp.getData(train_text, train_data, train_label); dp.getData(test_text, test_data, test_label); dp.SVM_Train(train_data, train_label, mode); dp.SVM_Predict(mode, test_data, test_label); */ //2. HOG + libsvm std::vector<Label> data; Mat train_data, train_label; Mat test_data, test_label; dp.getData(train_text, train_data, train_label); dp.getData(test_text, test_data, test_label); dp.libSVM_Train(train_data, train_label, svm_mode); dp.libSVM_Predict(svm_mode, test_data, test_label); /* //3. LBP + PCA + SVM std::vector<Label> data; Mat train_data, train_label; Mat test_data, test_label; dp.getData(train_text, train_data, train_label, LBP_FEATURE); dp.getData(test_text, test_data, test_label, LBP_FEATURE); Mat output_train, output_test; dp.PCA_Reduce(train_data, test_data, output_train, output_test); dp.SVM_Train(output_train, train_label, mode); dp.SVM_Predict(mode, output_test, test_label); */ system("PAUSE"); return 0; }