人脸检测部分解析

参考文章：https://blog.csdn.net/chiukeung/article/details/79923558

主要用到的函数：

以第一次优化后的代码进行分析，行数与原版有所一些差异。

初始化

执行 frontal_face_detector hogFaceDetector = get_frontal_face_detector();调用 frontal_face_detection.h 18:

namespace dlib
{
    typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > frontal_face_detector;
    inline const std::string get_serialized_frontal_faces();

    inline frontal_face_detector get_frontal_face_detector()
    {
        std::istringstream sin(get_serialized_frontal_faces());
        frontal_face_detector detector;
        deserialize(detector, sin);
		std::cout << __FUNCTION__ << " num of detectors = " << detector.num_detectors() << std::endl;
        return detector;
    }

执行 deserialize(detector, sin); 调用 object_detector.h 246:

    template <typename T>
    void deserialize (
        object_detector<T>& item,
        std::istream& in 
    )
    {
        int version = 0;
        deserialize(version, in);
        if (version == 1)
        {
            deserialize(item.scanner, in);
            item.w.resize(1);
            deserialize(item.w[0].w, in);
            item.w[0].init(item.scanner);
            deserialize(item.boxes_overlap, in);
        }
        else if (version == 2)
        {
            deserialize(item.scanner, in);
            deserialize(item.boxes_overlap, in);
            unsigned long num_detectors = 0;
            deserialize(num_detectors, in);
	    std::cout << __FUNCTION__ << " num_detectors = " << num_detectors << std::endl;
            item.w.resize(num_detectors);
            for (unsigned long i = 0; i < item.w.size(); ++i)
            {
                deserialize(item.w[i].w, in);
                item.w[i].init(item.scanner);
            }
        }
        else 
        {
            throw serialization_error("Unexpected version encountered while deserializing a dlib::object_detector object.");
        }
    }

执行object_detector.h 263 deserialize(item.scanner, in); 调用 scan_fhog_pyramid.h 492:

    template <typename T, typename U>
    void deserialize (
        scan_fhog_pyramid<T,U>& item,
        std::istream& in 
    )
    {
        int version = 0;
        deserialize(version, in);
        if (version != 1)
            throw serialization_error("Unsupported version found when deserializing a scan_fhog_pyramid object.");

        deserialize(item.fe, in);
        deserialize(item.feats, in);
        deserialize(item.cell_size, in);
        deserialize(item.padding, in);
        deserialize(item.window_width, in);
        deserialize(item.window_height, in);
        deserialize(item.max_pyramid_levels, in);
        deserialize(item.min_pyramid_layer_width, in);
        deserialize(item.min_pyramid_layer_height, in);
        deserialize(item.nuclear_norm_regularization_strength, in);

        // When developing some feature extractor, it's easy to accidentally change its
        // number of dimensions and then try to deserialize data from an older version of
        // your extractor into the current code.  This check is here to catch that kind of
        // user error.
        long dims;
        deserialize(dims, in);
        if (item.get_num_dimensions() != dims)
            throw serialization_error("Number of dimensions in serialized scan_fhog_pyramid doesn't match the expected number.");
    }

基本参数，fe, feats,cell_size,padding, window_width, window_height等

执行object_detector.h 264 deserialize(item.boxes_overlap, in); 调用 box_overlap_testing.h 132：

    inline void deserialize (
        test_box_overlap& item,
        std::istream& in 
    )
    {
        double percent_covered_thresh, iou_thresh;
        deserialize(iou_thresh, in);
        deserialize(percent_covered_thresh, in);
        item = test_box_overlap(iou_thresh, percent_covered_thresh);
    }

执行object_detector.h 266 deserialize(num_detectors, in); 调用

num_detectors = 5 //front, left,right, front left rotated, front right rotated人脸的五面

            item.w.resize(num_detectors);
            for (unsigned long i = 0; i < item.w.size(); ++i)
            {
                deserialize(item.w[i].w, in);
                item.w[i].init(item.scanner);
            }

对w进行赋值， w的size = 5

加载图像

    // Convert OpenCV image format to Dlib's image format
    cv_image<bgr_pixel> dlibIm(frameDlibHogSmall);

调用cv_image.h 26，将mat转化为dlib_image：

        cv_image (const cv::Mat img) 
        {
            DLIB_CASSERT(img.depth() == cv::DataType<typename pixel_traits<pixel_type>::basic_pixel_type>::depth &&
                         img.channels() == pixel_traits<pixel_type>::num, 
                         "The pixel type you gave doesn't match pixel used by the open cv Mat object."
                         << "\n\t img.depth():    " << img.depth() 
                         << "\n\t img.cv::DataType<typename pixel_traits<pixel_type>::basic_pixel_type>::depth: " 
                            << cv::DataType<typename pixel_traits<pixel_type>::basic_pixel_type>::depth 
                         << "\n\t img.channels(): " << img.channels() 
                         << "\n\t img.pixel_traits<pixel_type>::num: " << pixel_traits<pixel_type>::num 
                         );
            IplImage temp = img;
            init(&temp);
        }

人脸检测

    // Detect faces in the image
    std::vector<dlib::rectangle> faceRects = hogFaceDetector(dlibIm);

调用object_detector.h 528：

    template <
        typename image_scanner_type
        >
    template <
        typename image_type
        >
    std::vector<rectangle> object_detector<image_scanner_type>::
    operator() (
        const image_type& img,
        double adjust_threshold
    ) 
    {
        std::vector<rect_detection> dets;
        (*this)(img,dets,adjust_threshold);

        std::vector<rectangle> final_dets(dets.size());
        for (unsigned long i = 0; i < dets.size(); ++i)
            final_dets[i] = dets[i].rect;

        return final_dets;
    }

执行object_detector.h 535 (*this)(img,dets,adjust_threshold); 调用 object_detector.h 438：（人脸检测核心函数，入口）

    template <
        typename image_scanner_type
        >
    template <
        typename image_type
        >
    void object_detector<image_scanner_type>::
    operator() (
        const image_type& img,
        std::vector<rect_detection>& final_dets,
        double adjust_threshold
    ) 
    {
        scanner.load(img);    //执行完此句，图像金字塔建立完成
        std::vector<std::pair<double, rectangle> > dets;
        std::vector<rect_detection> dets_accum;
        /*
        for (unsigned long i = 0; i < w.size(); ++i)
        {
            const double thresh = w[i].w(scanner.get_num_dimensions());
            scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);
            for (unsigned long j = 0; j < dets.size(); ++j)
            {
                rect_detection temp;
                temp.detection_confidence = dets[j].first-thresh;
                temp.weight_index = i;
                temp.rect = dets[j].second;
                dets_accum.push_back(temp);
            }
        }
        */
//for_arm_top
        dlib::parallel_for(0,w.size(),[&](long i){
            //std::vector<std::pair<double, rectangle> > dets;
            const double thresh = w[i].w(scanner.get_num_dimensions());
            scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);
            for (unsigned long j = 0; j < dets.size(); ++j)
            {
                rect_detection temp;
                temp.detection_confidence = dets[j].first-thresh;
                temp.weight_index = i;
                temp.rect = dets[j].second;
                dets_accum.push_back(temp);
            }
        });

        // Do non-max suppression
        final_dets.clear();
        if (w.size() > 1)
            std::sort(dets_accum.rbegin(), dets_accum.rend());
        for (unsigned long i = 0; i < dets_accum.size(); ++i)
        {
            if (overlaps_any_box(final_dets, dets_accum[i].rect))
                continue;

            final_dets.push_back(dets_accum[i]);
        }
    }

执行object_detector.h 445 scanner.load(img); 调用 scan_fhog_pyramid.h 730:

    template <
        typename Pyramid_type,
        typename feature_extractor_type
        >
    template <
        typename image_type
        >
    void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
    load (
        const image_type& img
    )
    {
        unsigned long width, height;
        compute_fhog_window_size(width,height); //10*10
        impl::create_fhog_pyramid<Pyramid_type>(img, fe, feats, cell_size, height,
            width, min_pyramid_layer_width, min_pyramid_layer_height,
            max_pyramid_levels);
    }

执行scan_fhog_pyramid.h 735 compute_fhog_window_size(width,height); 调用scan_fhog_pyramid.h 356

    private:
        inline void compute_fhog_window_size(
            unsigned long& width,
            unsigned long& height
        ) const
        {
            const rectangle rect = centered_rect(point(0,0),window_width,window_height); //得到中心点为point的矩形
            const rectangle temp = grow_rect(fe.image_to_feats(rect, cell_size, 1, 1), padding);
            width = temp.width();
            height = temp.height();
        }

执行scan_fhog_pyramid.h 362 const rectangle temp = grow_rect(...); 调用scan_fhog_pyramid.h 22

    public:
        inline rectangle image_to_feats (
            const rectangle& rect,
            int cell_size,
            int filter_rows_padding,
            int filter_cols_padding
        ) const
        {
            return image_to_fhog(rect, cell_size, filter_rows_padding, filter_cols_padding);
        }

执行scan_fhog_pyramid.h 29 return image_to_fhog(...); 调用fhog.h 1187:

    inline rectangle image_to_fhog (
        const rectangle& rect,
        int cell_size = 8,
        int filter_rows_padding = 1,
        int filter_cols_padding = 1
    ) 
    {
        // make sure requires clause is not broken
        DLIB_ASSERT( cell_size > 0 &&
            filter_rows_padding > 0 &&
            filter_cols_padding > 0 ,
            "\t rectangle image_to_fhog()"
            << "\n\t Invalid inputs were given to this function. "
            << "\n\t cell_size: " << cell_size 
            << "\n\t filter_rows_padding: " << filter_rows_padding 
            << "\n\t filter_cols_padding: " << filter_cols_padding 
        );

        return rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
                         image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding));
    }

执行fhog.h 1205 return rectangle(image_to_fhog); 调用fhog.h 1161:

    inline point image_to_fhog (
        point p,
        int cell_size = 8,
        int filter_rows_padding = 1,
        int filter_cols_padding = 1
    )
    {
        // make sure requires clause is not broken
        DLIB_ASSERT( cell_size > 0 &&
            filter_rows_padding > 0 &&
            filter_cols_padding > 0 ,
            "\t point image_to_fhog()"
            << "\n\t Invalid inputs were given to this function. "
            << "\n\t cell_size: " << cell_size 
            << "\n\t filter_rows_padding: " << filter_rows_padding 
            << "\n\t filter_cols_padding: " << filter_cols_padding 
        );

        // There is a one pixel border around the image.
        p -= point(1,1);
        // There is also a 1 "cell" border around the HOG image formation.
        return p/cell_size - point(1,1) + point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2);
    }

// scan_fhog_pyramid.h 362 fe.image_to_feats(...)结束

执行scan_fhog_pyramid.h 362 const rectangle temp = grow_rect(...);

            const rectangle temp = grow_rect(fe.image_to_feats(rect, cell_size, 1, 1), padding);

调用 rectangle.h 614:

    inline const rectangle grow_rect (
        const rectangle& rect,
        long num 
    )
    {
        return shrink_rect(rect, -num);
    }

调用 rectangle.h 604:

    inline const rectangle shrink_rect (
        const rectangle& rect,
        long num 
    )
    {
        return rectangle(rect.left()+num, rect.top()+num, rect.right()-num, rect.bottom()-num);
    }

// scan_fhog_pyramid.h 735 compute_fhog_window_size(width,height)结束

执行scan_fhog_pyramid.h 736 impl::create_fhog_pyramid<Pyramid_type>(...); 调用 scan_fhog_pyramid.h 620:（核心函数）

	template <
            typename pyramid_type,
            typename image_type,
            typename feature_extractor_type
            >
        void create_fhog_pyramid (
            const image_type& img,
            const feature_extractor_type& fe,
            array<array<array2d<float> > >& feats,
            int cell_size,
            int filter_rows_padding,
            int filter_cols_padding,
            unsigned long min_pyramid_layer_width,
            unsigned long min_pyramid_layer_height,
            unsigned long max_pyramid_levels
        )
        {
            unsigned long levels = 0;
            rectangle rect = get_rect(img);
            //生成金字塔图像， levels表示多少级， 图像最小是64*64
            // figure out how many pyramid levels we should be using based on the image size
            pyramid_type pyr;
            do
            {
                rect = pyr.rect_down(rect);
                ++levels;
            } while (rect.width() >= min_pyramid_layer_width && rect.height() >= min_pyramid_layer_height &&
                levels < max_pyramid_levels);

            if (feats.max_size() < levels)
                feats.set_max_size(levels);
            feats.set_size(levels);   //我的输入图像是1024*1024， feats_size = 20
//for_arm_top
            //std::cout << "feats.size() = " << feats.size() << std::endl;
#if 1
            typedef typename image_traits<image_type>::pixel_type pixel_type;
            //long long t0 = currentTimeInMilliseconds();

            array<array2d<pixel_type>> image_pyr;
            image_pyr.set_max_size(levels);
            image_pyr.set_size(levels);
            assign_image(image_pyr[0],img);
			
            for(int i=0;i<image_pyr.size()-1;i++)
            {
                pyr(image_pyr[i], image_pyr[i+1]);
            }
            //long long t1 = currentTimeInMilliseconds();

            //impl::total_pyr_time = (t1-t0);
            // build our feature pyramid
            DLIB_ASSERT(feats[0].size() == fe.get_num_planes(), 
                "Invalid feature extractor used with dlib::scan_fhog_pyramid.  The output does not have the \n"
	        "indicated number of planes.");

	    //t0 = currentTimeInMilliseconds();
	    param_for_fill_in_feats<pixel_type, feature_extractor_type,image_type> param;
	    param.cell_size = cell_size;
	    param.fe = (feature_extractor_type*)&fe;
	    param.feats = &feats;
	    param.filter_cols_padding = filter_cols_padding;
	    param.filter_rows_padding = filter_rows_padding;
	    param.image_pyr = &image_pyr;
	    param.mutex = PTHREAD_MUTEX_INITIALIZER;
	    param.feats_idx = 0;
	    //fill_in_feats<pixel_type, feature_extractor_type>((void*)&param);
	    int num_of_thread = 3;
	    pthread_t fill_in_fe_thread[num_of_thread];
	    for(int i=0;i<num_of_thread;i++)
	    {
		pthread_create(&fill_in_fe_thread[i], NULL, &fill_in_feats<pixel_type, feature_extractor_type, image_type>, (void*)&param);
	    }
	    for(int i=0;i<num_of_thread;i++)
	    {	
		pthread_join(fill_in_fe_thread[i], NULL);
	    }
	    //t1 = currentTimeInMilliseconds();
	    //impl::total_fe_time = (t1-t0);	
#else
//for_arm_below
            // build our feature pyramid
            fe(img, feats[0], cell_size,filter_rows_padding,filter_cols_padding);
            DLIB_ASSERT(feats[0].size() == fe.get_num_planes(), 
                "Invalid feature extractor used with dlib::scan_fhog_pyramid.  The output does not have the \n"
                "indicated number of planes.");

            if (feats.size() > 1)
            {
                typedef typename image_traits<image_type>::pixel_type pixel_type;
                array2d<pixel_type> temp1, temp2;
                pyr(img, temp1);
                fe(temp1, feats[1], cell_size,filter_rows_padding,filter_cols_padding);
                swap(temp1,temp2);

                for (unsigned long i = 2; i < feats.size(); ++i)
                {
                    pyr(temp2, temp1);
                    fe(temp1, feats[i], cell_size,filter_rows_padding,filter_cols_padding);
                    swap(temp1,temp2);
                }
            }
#endif
        }

执行 scan_fhog_pyramid.h 696 fe(img, feats[0], cell_size,filter_rows_padding,filter_cols_padding);调用scan_fhog_pyramid.h 45:（优化后，未执行该语句，转而执行 673 param.fe = (feature_extractor_type*)&fe;）

        template <
            typename image_type
            >
        void operator()(
            const image_type& img, 
            dlib::array<array2d<float> >& hog, 
            int cell_size,
            int filter_rows_padding,
            int filter_cols_padding
        ) const
        {
            extract_fhog_features(img,hog,cell_size,filter_rows_padding,filter_cols_padding);
        }

执行 scan_fhog_pyramid.h 53 extract_fhog_features(...); 调用fhog.h 1085 :

(优化前后，都是执行default_fhog_feature_extractor、Feature_extractor_type)

    template <
        typename image_type, 
        typename T, 
        typename mm1, 
        typename mm2
        >
    void extract_fhog_features(
        const image_type& img, 
        dlib::array<array2d<T,mm1>,mm2>& hog, 
        int cell_size = 8,
        int filter_rows_padding = 1,
        int filter_cols_padding = 1
    ) 
    {
        impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding);
        // If the image is too small then the above function outputs an empty feature map.
        // But to make things very uniform in usage we require the output to still have the
        // 31 planes (but they are just empty).
        if (hog.size() == 0)
            hog.resize(31);
    }

执行fhog.h 1093 impl_fhog::impl_extract_fhog_features(); 调用fhog.h 704：（很长，核心HOG计算函数）

        template <
            typename image_type, 
            typename out_type
            >
        void impl_extract_fhog_features(
            const image_type& img_, 
            out_type& hog, 
            int cell_size,
            int filter_rows_padding,
            int filter_cols_padding
        ) 
        {
            const_image_view<image_type> img(img_);
            // make sure requires clause is not broken
            DLIB_ASSERT( cell_size > 0 &&
                         filter_rows_padding > 0 &&
                         filter_cols_padding > 0 ,
                "\t void extract_fhog_features()"
                << "\n\t Invalid inputs were given to this function. "
                << "\n\t cell_size: " << cell_size 
                << "\n\t filter_rows_padding: " << filter_rows_padding 
                << "\n\t filter_cols_padding: " << filter_cols_padding 
                );
            
            ...
        }

hog提取特征向量。在算cell里每个像素合梯度属于9个bin中的哪个bin时，采用x方向梯度，y方向梯度分别与9个方向单位向量的x,y相乘，计算得到哪个值最小，就属于哪个方向的bin，这种方法比计算方向角度(反正切值)速度快，而且可以定点化。

另外在计算属于哪个bin时，采用了插值的思想（回头补上）。hog特征为31维，18个360°bin+9个180°bin+4个邻域行特征向量的和做归一化。截断系数为0.2

// scan_fhog_pyramid.h 696 fe(img, feats[0], cell_size,filter_rows_padding,filter_cols_padding) 结束

执行 scan_fhog_pyramid.h 711 pyr(temp2, temp1); pyramid_type pyr; typename pyramid_type,

即初始时定义的类型 pyramid_down<6>

// scan_fhog_pyramid.h 736 impl::create_fhog_pyramid<Pyramid_type>(...) 结束

// object_detector.h 445 scanner.load(img) 结束

执行 object_detector.h 466 const double thresh = w[i].w(scanner.get_num_dimensions()); 调用scan_fhog_pyramid.h 808：

    template <
        typename Pyramid_type,
        typename feature_extractor_type
        >
    long scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
    get_num_dimensions (
    ) const
    {
        unsigned long width, height;
        compute_fhog_window_size(width,height);
        return width*height*fe.get_num_planes();  //width = 10, height = 10, fe.get_num_planes() = 31
    }

执行 object_detector.h 466 const double thresh = w[i].w(scanner.get_num_dimensions()); //scanner.get_num_dimensions()=3100=10*10*31

执行 object_detector.h 467 scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold); 调用scan_fhog_pyramid.h 977:

    template <
        typename Pyramid_type,
        typename feature_extractor_type
        >
    void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
    detect (
        const fhog_filterbank& w,
        std::vector<std::pair<double, rectangle> >& dets,
        const double thresh
    ) const
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(is_loaded_with_image() &&
                    w.get_num_dimensions() == get_num_dimensions(), 
            "\t void scan_fhog_pyramid::detect()"
            << "\n\t Invalid inputs were given to this function "
            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
            << "\n\t w.get_num_dimensions(): " << w.get_num_dimensions()
            << "\n\t get_num_dimensions():   " << get_num_dimensions()
            << "\n\t this: " << this
            );

        unsigned long width, height;
        compute_fhog_window_size(width,height);
//for_arm_top
        const int num_of_threads = 3;
	//long long t0 = currentTimeInMilliseconds();
        array<array<array2d<float> >> feats_dp[num_of_threads];
	std::vector<std::pair<double, rectangle> > dets_dp[num_of_threads];
	for(int h=0;h<num_of_threads;h++)
	{
	    feats_dp[h].set_max_size(feats.size());
	    feats_dp[h].set_size(feats.size());
	    for(int i=0;i<feats_dp[h].size();i++)
	    {
	        feats_dp[h][i].set_max_size(feats[i].size());
		feats_dp[h][i].set_size(feats[i].size());
		for(int j=0;j<feats_dp[h][i].size();j++)
		{
		    feats_dp[h][i][j].set_private_member(feats[i][j]);
		    //std::cout << " h = " << h << " i = " << i << " j = " << j << "ret = " << feats_dp[h][i][j].config_by_tid(h,num_of_threads,height) << std::endl;
		    feats_dp[h][i][j].config_by_tid(h,num_of_threads,height);
		    //if (feats_dp[h][i][j].config_by_tid(h,num_of_threads,height) == -1)
		    //if(j == 0)
		    //{
		    //	std::cout << " h = " << h << " i = " << i << " nr = " << feats_dp[h][i][j].nr() << " ori nr = " << feats[i][j].nr() << std::endl;
		    //}
		}
	    }
	}
	//long long t1 = currentTimeInMilliseconds();
	//std::cout << "build feats time = " << t1-t0 << std::endl;
	pthread_t detect_thread[num_of_threads];
	impl::param_for_detect<fhog_filterbank, feature_extractor_type> param[num_of_threads];
	for(int i=0;i<num_of_threads;i++)
	{
	    param[i].feats = &feats_dp[i];
	    param[i].fe = &fe;
	    param[i].w = &w;
	    param[i].thresh = thresh;
	    param[i].det_box_height = height-2*padding;
	    param[i].det_box_width = width-2*padding;
	    param[i].cell_size = cell_size;
	    param[i].filter_rows_padding = height;
	    param[i].filter_cols_padding = width;
	    param[i].dets = &dets_dp[i];
	    pthread_create(&detect_thread[i], NULL, &impl::detect_from_fhog_pyramid_wrapper<fhog_filterbank, feature_extractor_type,pyramid_type>, (void*)&param[i]);
	}
	//long long t2 = currentTimeInMilliseconds();
	//std::cout << "create threads time = " << t2-t1 << std::endl;		
	for(int i=0;i<num_of_threads;i++)
	{	
	    pthread_join(detect_thread[i], NULL);
	}
	//long long t3 = currentTimeInMilliseconds();
	//std::cout << "wait for threads are finished time = " << t3-t2 << std::endl;		
#if 0			
	for(int h=0;h<num_of_threads;h++)
	{
	    impl::detect_from_fhog_pyramid<pyramid_type>(feats_dp[h], fe, w, thresh,
                height-2*padding, width-2*padding, cell_size, height, width, dets_dp[h]);
	}
#endif
	for(int h=0;h<num_of_threads;h++)
	{
	    while(!dets_dp[h].empty())
	    {
	        dets.push_back(dets_dp[h].back());
		dets_dp[h].pop_back();
	    }
	}
	//long long t4 = currentTimeInMilliseconds();
	//std::cout << "build dets time = " << t4-t3 << std::endl;
//for_arm_below
        //impl::detect_from_fhog_pyramid<pyramid_type>(feats, fe, w, thresh,
        //    height-2*padding, width-2*padding, cell_size, height, width, dets);
    }

其中 object_detector.h 467 std::vector<processed_weight_vector<image_scanner_type> > w;定义了参数w，抽象实现调用scan_fhog_pyramid.h 1407:

    template <
        typename Pyramid_type,
        typename feature_extractor_type
        >
    struct processed_weight_vector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >
    {
        processed_weight_vector(){}

        typedef matrix<double,0,1> feature_vector_type;
        typedef typename scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::fhog_filterbank fhog_filterbank;

        void init (
            const scan_fhog_pyramid<Pyramid_type,feature_extractor_type>& scanner
        ) 
        {
            fb = scanner.build_fhog_filterbank(w);
        }

        const fhog_filterbank& get_detect_argument() const { return fb; }

        feature_vector_type w;
        fhog_filterbank fb;

    };

执行 scan_fhog_pyramid.h 1038 &impl::detect_from_fhog_pyramid_wrapper<...>; 调用 scan_fhog_pyramid.h 955：

	template <
            typename fhog_filterbank,
            typename feature_extractor_type,
            typename pyramid_type
            >
	void* detect_from_fhog_pyramid_wrapper (
	    void* ptr
	)
	{
	    //long long t0 = currentTimeInMilliseconds();
	    param_for_detect<fhog_filterbank, feature_extractor_type>* p = (param_for_detect<fhog_filterbank, feature_extractor_type>*)ptr;
	    impl::detect_from_fhog_pyramid<pyramid_type>(*(p->feats), *(p->fe), *(p->w), p->thresh,
	        p->det_box_height, p->det_box_width, p->cell_size, p->filter_rows_padding, p->filter_cols_padding, *(p->dets));
	    //long long t1 = currentTimeInMilliseconds();
	    //std::cout << "single thread finished time = " << t1-t0 << std::endl;				
	    return NULL;			
	}
//for_arm_below

执行 scan_fhog_pyramid.h 961 impl::detect_from_fhog_pyramid<pyramid_type>(...) ; 调用scan_fhog_pyramid.h 778:

        template <
            typename pyramid_type,
            typename feature_extractor_type,
            typename fhog_filterbank
            >
        void detect_from_fhog_pyramid (
            const array<array<array2d<float> > >& feats,
            const feature_extractor_type& fe,
            const fhog_filterbank& w,
            const double thresh,
            const unsigned long det_box_height,
            const unsigned long det_box_width,
            const int cell_size,
            const int filter_rows_padding,
            const int filter_cols_padding,
            std::vector<std::pair<double, rectangle> >& dets,
            bool clear = true   //for_arm
        ) 
        {
            if(clear) dets.clear();     //for_arm
            //dets.clear();    //检测器初始化

            array2d<float> saliency_image;    //显著图
            pyramid_type pyr;

            // for all pyramid levels 遍历所有金字塔图像
            for (unsigned long l = 0; l < feats.size(); ++l)
            {
                const rectangle area = apply_filters_to_fhog(w, feats[l], saliency_image); //得到显著图，区域

                // now search the saliency image for any detections
                for (long r = area.top(); r <= area.bottom(); ++r)
                {
                    for (long c = area.left(); c <= area.right(); ++c)
                    {
                        // if we found a detection
                        if (saliency_image[r][c] >= thresh)
                        {    //该区域位置
                            rectangle rect = fe.feats_to_image(centered_rect(point(c,r+feats[l][0].offset()),det_box_width,det_box_height), 
                                cell_size, filter_rows_padding, filter_cols_padding);
                            rect = pyr.rect_up(rect, l);    //图像放大到原图
                            dets.push_back(std::make_pair(saliency_image[r][c], rect));
                        }
                    }
                }
            }

            std::sort(dets.rbegin(), dets.rend(), compare_pair_rect); //根据分数排序
        }

执行 scan_fhog_pyramid.h 892 const rectangle area = apply_filters_to_fhog(w, feats[l], saliency_image); 调用 scan_fhog_pyramid.h 417：

    namespace impl
    {
        template <typename fhog_filterbank>
        rectangle apply_filters_to_fhog (
            const fhog_filterbank& w,
            const array<array2d<float> >& feats,
            array2d<float>& saliency_image
        )
        {
            const unsigned long num_separable_filters = w.num_separable_filters(); //61
            rectangle area;
            // use the separable filters if they would be faster than running the regular filters.
            if (num_separable_filters > w.filters.size()*std::min(w.filters[0].nr(),w.filters[0].nc())/3.0)
            {
                area = spatially_filter_image(feats[0], saliency_image, w.filters[0]);
                for (unsigned long i = 1; i < w.filters.size(); ++i)
                {
                    // now we filter but the output adds to saliency_image rather than
                    // overwriting it.
                    spatially_filter_image(feats[i], saliency_image, w.filters[i], 1, false, true);
                }
            }
            else
            {
                saliency_image.clear();
                array2d<float> scratch;

                // find the first filter to apply
                unsigned long i = 0;
                while (i < w.row_filters.size() && w.row_filters[i].size() == 0) 
                    ++i;

                for (; i < w.row_filters.size(); ++i)
                {
                    for (unsigned long j = 0; j < w.row_filters[i].size(); ++j)
                    {
                        if (saliency_image.size() == 0)
                            area = float_spatially_filter_image_separable(feats[i], saliency_image, w.row_filters[i][j], w.col_filters[i][j],scratch,false);
                        else
                            area = float_spatially_filter_image_separable(feats[i], saliency_image, w.row_filters[i][j], w.col_filters[i][j],scratch,true);
                    }    //横向卷积，纵向卷积比二维卷积速度快
                }
                if (saliency_image.size() == 0)
                {
                    saliency_image.set_size(feats[0].nr(), feats[0].nc());
                    assign_all_pixels(saliency_image, 0);
                }
            }
            return area;
        }
    }

注：FILT(r,c) == col_filter(r)*row_filter(c) //二维卷积和行、列卷积间的关系

执行 scan_fhog_pyramid.h 423 const unsigned long num_separable_filters = w.num_separable_filters(); 调用 scan_fhog_pyramid.h 223:

            unsigned long num_separable_filters() const 
            {
                unsigned long num = 0;
                for (unsigned long i = 0; i < row_filters.size(); ++i)
                {
                    num += row_filters[i].size();
                }
                return num;
            }

执行 scan_fhog_pyramid.h 451 area = float_spatially_filter_image_separable(...); 调用 spatial_filtering.h 562:

    // This overload is optimized to use SIMD instructions when filtering float images with
    // float filters.
    template <
        typename in_image_type,
        typename out_image_type,
        typename EXP1,
        typename EXP2
        >
    rectangle float_spatially_filter_image_separable (
        const in_image_type& in_img_,
        out_image_type& out_img_,
        const matrix_exp<EXP1>& _row_filter,
        const matrix_exp<EXP2>& _col_filter,
        out_image_type& scratch_,
        bool add_to = false
    )
    {
        // You can only use this function with images and filters containing float
        // variables.
        COMPILE_TIME_ASSERT((is_float_filtering<in_image_type,out_image_type,EXP1,EXP2>::value == true));


        const_temp_matrix<EXP1> row_filter(_row_filter);
        const_temp_matrix<EXP2> col_filter(_col_filter);
        DLIB_ASSERT(row_filter.size() != 0 && col_filter.size() != 0 &&
            is_vector(row_filter) &&
            is_vector(col_filter),
            "\trectangle float_spatially_filter_image_separable()"
            << "\n\t Invalid inputs were given to this function."
            << "\n\t row_filter.size(): "<< row_filter.size()
            << "\n\t col_filter.size(): "<< col_filter.size()
            << "\n\t is_vector(row_filter): "<< is_vector(row_filter)
            << "\n\t is_vector(col_filter): "<< is_vector(col_filter)
        );
        DLIB_ASSERT(is_same_object(in_img_, out_img_) == false,
            "\trectangle float_spatially_filter_image_separable()"
            << "\n\tYou must give two different image objects"
        );


        const_image_view<in_image_type> in_img(in_img_);
        image_view<out_image_type> out_img(out_img_);

        // if there isn't any input image then don't do anything
        if (in_img.size() == 0)
        {
            out_img.clear();
            return rectangle();
        }
        //out_img为结果
        out_img.set_size(in_img.nr(),in_img.nc());
        //卷积区域
        // figure out the range that we should apply the filter to
        const long first_row = col_filter.size()/2;
        const long first_col = row_filter.size()/2;
        const long last_row = in_img.nr() - ((col_filter.size()-1)/2);
        const long last_col = in_img.nc() - ((row_filter.size()-1)/2);

        const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
        if (!add_to)
            zero_border_pixels(out_img, non_border); 

        image_view<out_image_type> scratch(scratch_);
        scratch.set_size(in_img.nr(), in_img.nc());

        // apply the row filter 行卷积
        for (long r = 0; r < in_img.nr(); ++r)
        {
            long c = first_col;
            for (; c < last_col-7; c+=8)
            {
                simd8f p,p2,p3, temp = 0, temp2=0, temp3=0;
                long n = 0;
                for (; n < row_filter.size()-2; n+=3)
                {
                    // pull out the current pixel and put it into p
                    p.load(&in_img[r][c-first_col+n]);
                    p2.load(&in_img[r][c-first_col+n+1]);
                    p3.load(&in_img[r][c-first_col+n+2]);
                    temp += p*row_filter(n);
                    temp2 += p2*row_filter(n+1);
                    temp3 += p3*row_filter(n+2);
                }
                for (; n < row_filter.size(); ++n)
                {
                    // pull out the current pixel and put it into p
                    p.load(&in_img[r][c-first_col+n]);
                    temp += p*row_filter(n);
                }
                temp += temp2 + temp3;
                temp.store(&scratch[r][c]);
            }
            for (; c < last_col; ++c)
            {
                float p;
                float temp = 0;
                for (long n = 0; n < row_filter.size(); ++n)
                {
                    // pull out the current pixel and put it into p
                    p = in_img[r][c-first_col+n];
                    temp += p*row_filter(n);
                }
                scratch[r][c] = temp;
            }
        }

        // apply the column filter  列卷积
        for (long r = first_row; r < last_row; ++r)
        {
            long c = first_col;
            for (; c < last_col-7; c+=8)
            {
                simd8f p, p2, p3, temp = 0, temp2 = 0, temp3 = 0;
                long m = 0;
                for (; m < col_filter.size()-2; m+=3)
                {
                    p.load(&scratch[r-first_row+m][c]);
                    p2.load(&scratch[r-first_row+m+1][c]);
                    p3.load(&scratch[r-first_row+m+2][c]);
                    temp += p*col_filter(m);
                    temp2 += p2*col_filter(m+1);
                    temp3 += p3*col_filter(m+2);
                }
                for (; m < col_filter.size(); ++m)
                {
                    p.load(&scratch[r-first_row+m][c]);
                    temp += p*col_filter(m);
                }
                temp += temp2+temp3;

                // save this pixel to the output image
                if (add_to == false)
                {
                    temp.store(&out_img[r][c]);
                }
                else
                {
                    p.load(&out_img[r][c]);
                    temp += p;
                    temp.store(&out_img[r][c]);
                }
            }
            for (; c < last_col; ++c)
            {
                float temp = 0;
                for (long m = 0; m < col_filter.size(); ++m)
                {
                    temp += scratch[r-first_row+m][c]*col_filter(m);
                }

                // save this pixel to the output image
                if (add_to == false)
                {
                    out_img[r][c] = temp;
                }
                else
                {
                    out_img[r][c] += temp;
                }
            }
        }
        return non_border;
    }

// scan_fhog_pyramid.h 892 const rectangle area = apply_filters_to_fhog(w, feats[l], saliency_image); 结束

// scan_fhog_pyramid.h 961 impl::detect_from_fhog_pyramid<pyramid_type>(...) 结束

// object_detector.h 467 scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold); 结束，多线程detect完成

// object_detector.h 478 // Do non-max suppression 完成，人脸检测完成

dlib解析（一）

人脸检测部分解析

初始化

加载图像

人脸检测

猜你喜欢