最近在做目标跟踪的相关程序的加速，整合借这个机会整理下TLD的源码理解，由于本人C++功底有限，难免会出现表述和理解错误，请大家批评指正
本帖子长期更新未完部分耐心等待

TLD源码解读

主函数部分

主要涉及两个函数：

RTLTrackerInit

tracker.RTLTrackerInit(last_gray, box);

其中 last gray 是frame第一帧经过灰度变换的图像，代码如下：

capture >> frame;
cvtColor(frame, last_gray, CV_RGB2GRAY);

Box 是我们标定的目标位置，TLD是单目标跟踪所以只能实现标记一个框

Rect box(248,198,27,14);

RTLTrackerTracking

tracker.RTLTrackerTracking(last_gray, current_gray, pbox, status, true);

其中 pbox 是 box 调用 BoundingBox 的构造函数得到的

class BoundingBox : public cv::Rect
{
public:
    BoundingBox( ){}
    BoundingBox( cv::Rect r ) : cv::Rect( r ){}
    /* 和指定的box的重叠度 */
    float overlap;
    /* 使用的缩放组合的id( 0~21 ) */
    int scaleId;
};

status函数表示是否跟踪成功，如果跟踪成功则画框

 if (status) {
            rectangle(frame, pbox, Scalar(0, 255, 0), 2, 8, 0);
            detections++;
        }

TrackerInit函数解读

TrackerInit类

void RtLtTracker::RTLTrackerInit( const Mat &frame, const Rect userBox )
{

    /* 获得所有的扫描窗口 */
    boxOperator.getAllScanWindowOfFrame( frame, userBox );
    /* 将这些窗口分类为好坏窗口，并得到好窗口的边界 */
    boxOperator.classifyScanningWindows( userBox, numClosestInit );

    /* 分配各种空间 */
    /*
    struct TempStruct {
    std::vector<std::vector<int> > patt;
    std::vector<float> conf;
    };
    */
    tmp.conf = vector<float>( boxOperator.allScanningWindows.size( ) );
    tmp.patt = vector<vector<int> >( boxOperator.allScanningWindows.size( ), vector<int>( 10, 0 ) );
    classifier.positiveNNSample.create( classifier.patchSize, classifier.patchSize, CV_64F );

    /***********************************************************\
    * 源代码有保留上一帧信息的操作，应该是用来做BF跟踪器跟丢自检，这里先不加  *
    \***********************************************************/
    /****
    ????这里有个疑问是为什么不用初始rect作为bestWindows，而还是要用和Rect重合度最好的windows作为             bestWindows   因为这本来就是初始化阶段啊
    ****/
    lastbox=boxOperator.bestWindow;
    lastconf=1;
    lastvalid=true;

    classifier.prepare( boxOperator.allScaledSizes );

    /* 生成数据 */
    patchGenerator = PatchGenerator(
        0, 0, noiseInit, true, 1 - scaleInit, 1 + scaleInit,
        -angleInit*CV_PI / 180, angleInit*CV_PI / 180,
        -angleInit*CV_PI / 180, angleInit*CV_PI / 180 );

    boxOperator.iisum.create( frame.rows + 1, frame.cols + 1, CV_32F );
    boxOperator.iisqsum.create( frame.rows + 1, frame.cols + 1, CV_64F );
    integral( frame, boxOperator.iisum, boxOperator.iisqsum );

    Scalar stdev, mean;
    meanStdDev( frame( boxOperator.bestWindow ), mean, stdev );

    classifier.varClassifierTh = powf( stdev.val[0], 2 ) * 0.5f;

    classifier.generatePositiveData(
        frame, classifier.numWrapsInit, boxOperator, patchGenerator );
    classifier.generateNegativeData(
        frame, boxOperator);

    /* 处理数据并训练分类器 */
    classifier.makeTrainAndTestDataThenTrain( );
}

对上述为啥不用userBox 而是用 BestWindow 的疑问的解答：

这里写图片描述

初始化阶段的userBox和bestWindow本质是差不多的

下面我们逐句分析Tracker的代码:

getAllScanWindowOfFrame

输入1： frame 代表第一帧
输入2：userBox代表初始的矩阵框 Rect


void BoxOperator::getAllScanWindowOfFrame( const Mat &frame, const Rect &userBox )
{
    // 所有的缩放尺度，由尺度缩放系数构造，为[1.2^-10, 1.2^10]，一共21种缩放尺度
    const float SCALES[] ={
        0.16151f, 0.19381f, 0.23257f, 0.27908f, 0.33490f,
        0.40188f, 0.48225f, 0.57870f, 0.69444f, 0.83333f,
        1.00000f, 1.20000f, 1.44000f, 1.72800f, 2.07360f,
        2.48832f, 2.98598f, 3.58318f, 4.29982f, 5.15978f,
        6.19174f };

    Size scaledSize;
    BoundingBox bbox;
    int cnt = 0;

    // 对于每一种缩放尺度
    for ( int s = 0; s < 21; s++ )
    {
        /*********************代表缩放后的宽度************************/
        int scaledWidth  = (int)roundf( userBox.width * SCALES[s] );
         /*********************代表缩放后的高度************************/
        int scaledHeight = (int)roundf( userBox.height * SCALES[s] );
         /****************变换权值：宽度和高度较小值*********************/
        int shiftWeight  = min( scaledWidth, scaledHeight );

        /*    
        ***********筛选出不合适的padding size 跳过****************
              1) 不满足最小窗尺寸：minWindowSize = 15
              2) 缩放后的宽度比原图像帧的宽度还大
              3) 缩放后的高度比原图像帧的长度还大
        *********************************************************
        */
        if ( shiftWeight < minWindowSize ||
            scaledWidth > frame.cols ||
            scaledHeight > frame.rows )
        {
            continue;
        }
        /************* 保留合适的scaledSize **********/
        scaledSize.width  = scaledWidth;
        scaledSize.height = scaledHeight;
        /***********保存所有的ScaledSize*************/
        allScaledSizes.push_back( scaledSize );
        /***************设置扫描步长******************/
        int step = (int)roundf( shiftWeight * scanningShift );
        for ( int y=1; y < frame.rows - scaledHeight; y += step )
        {
            for ( int x=1; x < frame.cols - scaledWidth; x += step )
            {
                bbox.x = x;
                bbox.y = y;
                bbox.width   = scaledWidth;
                bbox.height  = scaledHeight;
                // 保存与初始输入窗口box的重叠度
                bbox.overlap = bbOverlap( bbox, BoundingBox( userBox ) );
                // 记录扫描窗口的id
                bbox.scaleId = cnt;
                // 把所有的窗口集中到一个容器内
                allScanningWindows.push_back( bbox );
            }
        }
        cnt++;
    }
}

classifyScanningWindows

输入1：

/* 在初始化分类器时需要保留的好窗口数量 */
    int numClosestInit          = 10;

 输入2：
  userBox 代表初始的矩阵框 Rect

void BoxOperator::classifyScanningWindows( const Rect &userBox, int closestNum )
{
    float maxOverlap = 0;
    for ( int i = 0; i < allScanningWindows.size( ); i++ )
    {
        if ( allScanningWindows[i].overlap > maxOverlap )
        {
            bestWindow = allScanningWindows[i];
            maxOverlap = bestWindow.overlap;
        }
        /**********goodOverlapth代表好窗口重叠度  = 0.6f ************/
        if ( allScanningWindows[i].overlap > goodOverlapTh )
        {
            goodBoxIndexes.push_back( i );
        }
        /**********badOverlapth代表坏窗口重叠度  = 0.2f ************/
        else if ( allScanningWindows[i].overlap < badOverlapTh )
        {
            badBoxIndexes.push_back( i );
        }
    }

    /*  如果筛选出来的好窗口数量大于给定数量，那么选取重合度大的留下来 
        在不要求排序的前提下选取给定条件下前N大的数 */
    //  ！！！！这段代码可以用作欣赏！！！！
    if ( goodBoxIndexes.size( ) > closestNum )
    {
        nth_element(
            goodBoxIndexes.begin( ),
            goodBoxIndexes.begin( ) + closestNum,
            goodBoxIndexes.end( ),
          /*struct OComparator
           {
             OComparator( const std::vector<BoundingBox>& _grid )
                 :grid( _grid ){}

             std::vector<BoundingBox> grid;
             bool operator()( int idx1, int idx2 )
            {
              return grid[idx1].overlap > grid[idx2].overlap;
             }
           };
           */
            OComparator( allScanningWindows ) );
        goodBoxIndexes.resize( closestNum );
    }

    getGoodBoxHull( );
}

代码段的最后其中涉及到getGoodBoxHull()

void BoxOperator::getGoodBoxHull( )
{
    int x1=INT_MAX, x2=0;
    int y1=INT_MAX, y2=0;
    int idx;
    /***
        这段代码的目的是：
        保证 x1 和y1 在 int 可以表示的范围内
        保证 x2 和 y2 是大于0的数
        ***/
    for ( int i=0; i < goodBoxIndexes.size( ); i++ )
    {
        idx= goodBoxIndexes[i];
        x1 = min( allScanningWindows[idx].x, x1 );
        y1 = min( allScanningWindows[idx].y, y1 );
        x2 = max( allScanningWindows[idx].x + allScanningWindows[idx].width, x2 );
        y2 = max( allScanningWindows[idx].y + allScanningWindows[idx].height, y2 );
    }

    goodBoxHull.x      = x1;
    goodBoxHull.y      = y1;
    goodBoxHull.width  = x2 - x1;
    goodBoxHull.height = y2 - y1;
}

tmp结构分配空间

tmp.conf = vector<float>( boxOperator.allScanningWindows.size( ) );
tmp.patt = vector<vector<int> >( boxOperator.allScanningWindows.size( ), vector<int>( 10, 0 ) );

其中tmp的TempStruct为

struct TempStruct {
    std::vector<std::vector<int> > patt;
    std::vector<float> conf;
};

正样本的pathsize为 15*15

classifier.positiveNNSample.create( classifier.patchSize, classifier.patchSize, CV_64F );

classifier.prepare(boxOperator.allScaledSizes)
关于随机厥的详细用法可以参考：http://www.cnblogs.com/nsnow/p/4670640.html
输入：
boxOperator.allScaledSizes是在getAllScanWindowOfFrame函数中体现的

/***********保存所有的ScaledSize*************/
        allScaledSizes.push_back( scaledSize );

这行语句代表所有符合要求的21种尺度下的方框

void FernNNClassifier::prepare( const vector<Size> &scales )
{
    acum = 0;

    /* 初始化测试特征的位置 */
    // 随机森林中树的总数为 10
    // 每棵树的特征总数为 13 ，每棵树的判断节点个数，树上每一个特征作为一个决策点 
    int totalFeatures = totFerns * featureNumPerFern;
    fernsFeatures = vector<vector<Feature> >( scales.size( ), vector<Feature>( totalFeatures ) );
    RNG& rng = theRNG( );
    float x1f, x2f, y1f, y2f;
    int x1, x2, y1, y2;
    for ( int i=0; i < totalFeatures; i++ ){
        x1f = (float)rng;
        y1f = (float)rng;
        x2f = (float)rng;
        y2f = (float)rng;
        /* 利用随机数，随机定位出两个像素点，并作为特征 */
        //  其实就是2bit BP特征 随机找两个点比较亮度
        //  这篇博客里有写 http://www.cnblogs.com/nsnow/p/4670640.html
        for ( int s=0; s < scales.size( ); s++ ){
            x1 = x1f * scales[s].width;
            y1 = y1f * scales[s].height;
            x2 = x2f * scales[s].width;
            y2 = y2f * scales[s].height;
            fernsFeatures[s][i] = Feature( x1, y1, x2, y2 );
        }
    }

    fernNegativeTh = 0.5f * totFerns;

    /* 初始化后验概率 */
    for ( int i = 0; i < totFerns; i++ )
    {
        posteriors.push_back( vector<float>( powf( 2.0f, featureNumPerFern ), 0 ) );
        positiveCounter.push_back( vector<int>( pow( 2.0, featureNumPerFern ), 0 ) );
        negativeCounter.push_back( vector<int>( pow( 2.0, featureNumPerFern ), 0 ) );
    }
}

在这里继续查看Feature类的定义

struct Feature
    {
        uchar x1, y1, x2, y2;
        Feature( ) : x1( 0 ), y1( 0 ), x2( 0 ), y2( 0 ) {}
        Feature( int _x1, int _y1, int _x2, int _y2 )
            : x1( (uchar)_x1 ), y1( (uchar)_y1 ), x2( (uchar)_x2 ), y2( (uchar)_y2 )
        {}
        bool operator ()( const cv::Mat& patch ) const
        {
            return patch.at<uchar>( y1, x1 ) > patch.at<uchar>( y2, x2 );
        }
    };

关于RNG的用法可以参考博客：http://blog.csdn.net/yang_xian521/article/details/6931385
重点是：就是要写成 rng.uniform(0.f, 1.f); 而不能写成rng.uniform( 0 , 1)，因为输入为int型参数，会调用uniform（int，int），只能产生0。请大家注意使用

疑点是：为什么初始化成2的13次幂：

/************************* 初始化后验概率********************************/
//一共totFerns bit的特征 所以可能的情形有 2^totFerns 个 即0 到 2^totFerns - 1个
//http://www.cnblogs.com/nsnow/p/4670640.html中的作图 坐标范围有误 但是原理正确
    for ( int i = 0; i < totFerns; i++ )
    {
        posteriors.push_back( vector<float>( powf( 2.0f, featureNumPerFern ), 0 ) );
        positiveCounter.push_back( vector<int>( pow( 2.0, featureNumPerFern ), 0 ) );
        negativeCounter.push_back( vector<int>( pow( 2.0, featureNumPerFern ), 0 ) );
    }

patchGenerator
现在我也不懂为什么参数要这么设置，应该和接下来的仿射变换生成样本有关，先放在这，以后补上

    patchGenerator = PatchGenerator(
        0, 0, noiseInit, true, 1 - scaleInit, 1 + scaleInit,
        -angleInit*CV_PI / 180, angleInit*CV_PI / 180,
        -angleInit*CV_PI / 180, angleInit*CV_PI / 180 );

integral
这个部分是用于计算图像帧的积分图
具体细节参考博客：http://blog.csdn.net/iracer/article/details/49029239
其中参数的含义和积分图的意义如下图所示

boxOperator.iisum.create( frame.rows + 1, frame.cols + 1, CV_32F );
boxOperator.iisqsum.create( frame.rows + 1, frame.cols + 1, CV_64F );
integral( frame, boxOperator.iisum, boxOperator.iisqsum );

meanStdDev

meanStdDev( frame( boxOperator.bestWindow ), mean, stdev );

这里写图片描述

varClassifierTh
方差分类器的方差阈值

classifier.varClassifierTh = powf( stdev.val[0], 2 ) * 0.5f;

generatePositiveData

void FernNNClassifier::generatePositiveData( const Mat& frame, int numWarps, const BoxOperator &bop, PatchGenerator patchGenerator )
{
    Scalar mean, stdev;

    /*
     * 将frame图像bestBox区域的图像片归一化为均值为0的15*15大小的patch，
     * 存于positiveNNSample（用于最近邻分类器的正样本）中（最近邻的box的Pattern），
     * 该正样本只有一个。
     */
    NormalOperation::getPattern( frame( bop.bestWindow ), positiveNNSample, patchSize, mean, stdev );


    /************************\
    * 做仿射变换，并提取Fern特征 *
    \************************/

    Mat img, wraped;

    /* 利用高斯滤波平滑图像 */
    GaussianBlur( frame, img, Size( 9, 9 ), 1.5 );
    wraped = img( bop.goodBoxHull );
    RNG& rng = theRNG( );

    /* 获取好窗口边界框的中心点 */
    Point2f hullCenter(
        bop.goodBoxHull.x + ( bop.goodBoxHull.width - 1 ) * 0.5f,
        bop.goodBoxHull.y + ( bop.goodBoxHull.height - 1 )* 0.5f );

    vector<int> fern( totFerns );
    positiveFernSamples.clear( );

    Mat patch;
    int idx;

    for ( int i=0; i < numWarps; i++ )
    {
        if ( i > 0 )
        {
            /* 对图像进行仿射变换 */
            patchGenerator( frame, hullCenter, wraped, bop.goodBoxHull.size( ), rng );
        }

        for ( int b = 0; b < bop.goodBoxIndexes.size( ); b++ )
        {
            idx   = bop.goodBoxIndexes[b];
            patch = img( bop.allScanningWindows[idx] );
            /* 获得输入patch的13位二进制码特征 */
            getFernsFeatures(
                patch, bop.allScanningWindows[idx].scaleId,
                fern );
            /* 标记为正样本 */
            positiveFernSamples.push_back( make_pair( fern, 1 ) );
        }
    }

}

仿射变换介绍：具体详细解释见 http://blog.csdn.net/carson2005/article/details/7540936
这里写图片描述

获取13bit特征详见博客：http://www.cnblogs.com/nsnow/p/4670640.html

generateNegativeData

void FernNNClassifier::generateNegativeData( const Mat &frame, BoxOperator &bop )
{
    random_shuffle( bop.badBoxIndexes.begin( ), bop.badBoxIndexes.end( ) );

    /************************************************************\
    * 利用积分图，计算每个badBox对应patch的方差，然后选取那些方差大的patch *
    \************************************************************/
    int idx;
    vector<int> fern( totFerns );
    Mat patch;

    /* 将方差大于varClassifierTh/2的提取特征，标记为负样本0，
     * 放入集合分类器负样本集合negativeFernSamples中
     */
    for ( int j = 0; j < bop.badBoxIndexes.size( ); j++ )
    {
        idx = bop.badBoxIndexes[j];

        if ( bop.getVar( bop.allScanningWindows[idx], bop.iisum, bop.iisqsum ) < varClassifierTh * 0.5f )
        {
            continue;
        }

        patch = frame( bop.allScanningWindows[idx] );
        getFernsFeatures( patch, bop.allScanningWindows[idx].scaleId, fern );
        negativeFernSamples.push_back( make_pair( fern, 0 ) );
    }

    /* 取 bad_patches 个归一化以后作为NN分类器的负样本 */
    Scalar tmp1, tmp2;
    negativeNNSamples = vector<Mat>( badPatchs );

    for ( int i = 0; i < badPatchs; i++ )
    {
        idx = bop.badBoxIndexes[i];
        patch = frame( bop.allScanningWindows[idx] );
        NormalOperation::getPattern( patch, negativeNNSamples[i], patchSize, tmp1, tmp2 );
    }
}

TLD 目标跟踪源码理解

TLD源码解读

主函数部分

TrackerInit函数解读

猜你喜欢