HOG+SVM实现行人检测

一、概述

  1. 行人检测过去流行采用的方法是DPM方法,其主要采用hog特征+SVM分类实现行人检测;
  2. 其中梯度方向直方图( Histogram of Oriented Gradients,HOG)的概念是
    Dalal和Triggs在2005年提出,并将其用于行人检测,
  3. 该方法在 MIT行人数据库上获得近乎 100% 的检测成功率;在包含视角、 光照和背景等变化的 INRIA 行人数据库上,也取得了大约 90% 的检测成功率。HOG是目前使用最为广泛的行人特征描述子。

参考博客:

二、实现代码

先上完整的代码,再进行解析说明

#include <iostream>
#include <fstream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/ml/ml.hpp>
#include <sys/time.h>

using namespace std;
using namespace cv;
using namespace cv::ml;


#define PosSamNO 2416  //original positive num
#define NegSamNO 6070 // original negative num
#define cropNegNum 1214  //number of to be croped picture

#define HardExampleNO 0 // hard negative num
#define AugPosSamNO 0 //Aug positive num

#define TRAIN true //if TRAIN is true, it will Train the data, if false it will not 
#define CENTRAL_CROP true  // it is nessary to set it to true,to fit the hog parameter
#define crop_negsample false //if true,it will crop the negtive sample at random

/*********************************    随机剪裁负样本   *******************************************/
void crop_negsample_random()
{
	string imgName;
	char saveName[200];
	ifstream fileNeg("../img_dir/sample_neg.txt");
	
	int num=0;
    //如果文件存在,则先删除该文件
    ofstream fout("../img_dir/sample_new_neg.txt",ios::trunc);

	//读取负样本
	for (int i = 0;i < cropNegNum && getline(fileNeg, imgName); i++)
	{
		imgName = "../normalized_images/train/neg/" + imgName;  //加路径
		Mat img = imread(imgName, IMREAD_UNCHANGED);
		struct timeval tv;
		if (img.empty())
		{
			cout << "can not load the image:" << imgName << endl;
			continue;
		}
		if (img.cols >= 64 && img.rows >= 128)
		{
			num = 0;
			//从每张图片中随机剪裁5张64*128的负样本
			for (int j = 0;j < 5;j++)
			{
				
				gettimeofday(&tv,NULL);
				srand(tv.tv_usec);//利用系统时间(微妙),设置随机数种子
				int x = rand() % (img.cols - 64); //左上角x
				int y = rand() % (img.rows - 128); //左上角y
				cout << "x:" << x << "y:" << y <<endl;
				Mat src = img(Rect(x, y, 64, 128));
				sprintf(saveName, "../normalized_images/train/new_neg/neg%dCropped%d.png",i, num);
				imwrite(saveName,src);

                //保存裁剪得到的图片名称到txt文件,换行分隔
                if(i<(cropNegNum-1)){
                    fout <<"neg" << i << "Cropped"<< num++ << ".png"<< endl;
                }
                else if(i==(cropNegNum-1) && j<4){
                    fout <<"neg" << i << "Cropped"<< num++ << ".png"<< endl;
                }
                else{
                    fout <<"neg" << i << "Cropped"<< num++ << ".png";
                }        
			}
		}
	}
    fout.close();
	cout << "crop ok!" << endl;
}

int main()
{
    if(crop_negsample){
        crop_negsample_random(); //裁剪负样本
    }
    //winsize(64,128),blocksize(16,16),blockstep(8,8),cellsize(8,8),bins9
    HOGDescriptor hog(Size(64,128),Size(16,16),Size(8,8),Size(8,8),9);
    int DescriptorDim;
    Ptr<SVM> svm = SVM::create();
    if(TRAIN)
    {
        string ImgName;
        ifstream finPos("../img_dir/sample_pos.txt");
        // ifstream finNeg("../sample_neg.txt");
        ifstream finNeg("../img_dir/sample_new_neg.txt");

        if (!finPos || !finNeg)
            {
                cout << "Pos/Neg imglist reading failed..." << endl;
                return 1;
            }

        Mat sampleFeatureMat;
        Mat sampleLabelMat;

        //loading original positive examples...
        for(int num=0; num < PosSamNO && getline(finPos,ImgName); num++)
        {
            //cout <<"Now processing original positive image: " << ImgName << endl;
            ImgName = "../normalized_images/train/pos/" + ImgName;
            Mat src = imread(ImgName);

            if(CENTRAL_CROP)
                resize(src,src,Size(64,128));// src = src(Rect(16,16,64,128));

            vector<float> descriptors;
            hog.compute(src, descriptors, Size(8,8));//计算HOG描述子,检测窗口移动步长(8,8)


            if( 0 == num )
            {
                DescriptorDim = descriptors.size();
                sampleFeatureMat = Mat::zeros(PosSamNO +AugPosSamNO +NegSamNO +HardExampleNO, DescriptorDim, CV_32FC1);
                sampleLabelMat = Mat::zeros(PosSamNO +AugPosSamNO +NegSamNO +HardExampleNO, 1, CV_32SC1);//sampleLabelMat的数据类型必须为有符号整数型
            }

            //将计算好的HOG描述子复制到样本特征矩阵sampleFeatureMat
            for(int i=0; i<DescriptorDim; i++)
                sampleFeatureMat.at<float>(num,i) = descriptors[i];
            sampleLabelMat.at<int>(num,0) = 1;
        }
        finPos.close();


        //loading original negative examples...
        for(int num = 0; num < NegSamNO && getline(finNeg,ImgName); num++)
        {
            //cout<<"Now processing original negative image: "<<ImgName<<endl;
            // ImgName = "../normalized_images/train/neg/" + ImgName;
            ImgName = "../normalized_images/train/new_neg/" + ImgName;
            Mat src = imread(ImgName);

            vector<float> descriptors;
            hog.compute(src,descriptors,Size(8,8));

            for(int i=0; i<DescriptorDim; i++)
                sampleFeatureMat.at<float>(num+PosSamNO,i) = descriptors[i];
            sampleLabelMat.at<int>(num +PosSamNO +AugPosSamNO, 0) = -1;

        }
        finNeg.close();


        svm ->setType(SVM::C_SVC);
        svm ->setC(0.01);
        svm ->setKernel(SVM::LINEAR);
        // svm ->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 3000, 1e-6));
        svm ->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 100, 1e-3));

        cout<<"Starting training..."<<endl;
        svm ->train(sampleFeatureMat, ROW_SAMPLE, sampleLabelMat);
        cout<<"Finishing training..."<<endl;

        svm ->save("../data/SVM_HOG.xml");

    }
    else {
        svm = SVM::load( "../data/SVM_HOG.xml" );   
    }
    cout << "loaded SVM_HOG.xml file"  << endl;

    Mat svecsmat = svm ->getSupportVectors();//svecsmat元素的数据类型为float
    int svdim = svm ->getVarCount();//特征向量位数
    int numofsv = svecsmat.rows;

    // Mat alphamat = Mat::zeros(numofsv, svdim, CV_32F);//alphamat和svindex必须初始化,否则getDecisionFunction()函数会报错
    Mat alphamat = Mat::zeros(numofsv, svdim, CV_32F);
    Mat svindex = Mat::zeros(1, numofsv,CV_64F);
    cout << "after initialize the value of alphamat is  " << alphamat.size()  << endl;  

    Mat Result;
    double rho = svm ->getDecisionFunction(0, alphamat, svindex);

    cout << "the value of rho is  " << rho << endl;    
    alphamat.convertTo(alphamat, CV_32F);//将alphamat元素的数据类型重新转成CV_32F
    cout << "the value of alphamat is  " << alphamat << endl; 
    cout << "the size of alphamat is  " << alphamat.size() << endl;
    cout << "the size of svecsmat is  " << svecsmat.size() << endl;
    Result = -1 * alphamat * svecsmat;//float

    cout << "the value of svdim is  " << svdim << endl;

    vector<float> vec;
    for (int i = 0; i < svdim; ++i)
    {
        vec.push_back(Result.at<float>(0, i));
    }
    vec.push_back(rho);
    cout << "going to write the HOGDetectorForOpenCV.txt file"  << endl;
    //saving HOGDetectorForOpenCV.txt
    ofstream fout("HOGDetectorForOpenCV.txt");
    for (int i = 0; i < vec.size(); ++i)
    {
        fout << vec[i] << endl;
    }


    /*********************************Testing**************************************************/
    HOGDescriptor hog_test;
    hog_test.setSVMDetector(vec);

    // Mat src = imread("../person_and_bike_177b.png");
    Mat src = imread("../3.jpg");
    vector<Rect> found, found_filtered;
    hog_test.detectMultiScale(src, found, 0, Size(8,8), Size(32,32), 1.05, 2);

    cout<<"found.size : "<<found.size()<<endl;

    //找出所有没有嵌套的矩形框r,并放入found_filtered中,如果有嵌套的话,则取外面最大的那个矩形框放入found_filtered中
    for(int i=0; i < found.size(); i++)
    {
        Rect r = found[i];
        int j=0;
        for(; j < found.size(); j++)
            if(j != i && (r & found[j]) == r)
                break;
        if( j == found.size())
            found_filtered.push_back(r);
    }


    //画矩形框,因为hog检测出的矩形框比实际人体框要稍微大些,所以这里需要做一些调整
    for(int i=0; i<found_filtered.size(); i++)
    {
        Rect r = found_filtered[i];
        r.x += cvRound(r.width*0.1);
        r.width = cvRound(r.width*0.8);
        r.y += cvRound(r.height*0.07);
        r.height = cvRound(r.height*0.8);
        rectangle(src, r.tl(), r.br(), Scalar(0,255,0), 3);
    }

    imwrite("ImgProcessed.jpg",src);
    namedWindow("src",0);
    imshow("src",src);
    waitKey(0);

    return 0;
 }

代码说明

  • 程序在最开始的地方定义了负样本裁剪函数:crop_negsample_random;用于从少量的负样本中裁剪得到足够的负样本
  • 行人数据集:INRIADATA,使用整理过的图片即:original_images这个文件夹中图片即可
  • 当报错的时候,首先检查:正负样本的数量和对应的txt文件对应的行数是否一致
  • 代码运行环境为:opencv3.0以上,在Ubuntu14.04+opencv3.2上运行测试,采用cmake进行编译进行了验证,目录组织如下图所示:
    在这里插入图片描述
    其中normalized_images为训练所需的图片,img_dir包含正负样本名称的txt文件,data包含一些测试的图片,完整代码可参考:https://download.csdn.net/download/yph001/10721973
  • 其中在Ubuntu下将目录中的所有文件名输出到txt文件的方法为:
//例如将neg这个文件夹下面的所有文件名称,输入到与neg文件夹同级的sample_neg.txt目录
cd neg
ls -R * > ../sample_neg.txt
  • 注意要将CENTRAL_CROP 设置为true,使得对正样本图片进行裁剪成64*128大小,使得这与hog参数中设置的(64,128)相吻合,要不很容易运行特别长时间,并最终报错

猜你喜欢

转载自blog.csdn.net/yph001/article/details/83053475