ncnn之七:ncnn量化(post-training quantization)三部曲 - ncnn2table

量化步骤

Input: FP32 histogram H with 2048 bins: bin[ 0 ],, bin[ 2047 ]
For i in range( 128 , 2048 ):
     reference_distribution_P = [ bin[ 0 ] , ..., bin[ i-1 ] ] // take first ‘ i ‘ bins from H
     outliers_count = sum( bin[ i ] , bin[ i+1 ] ,, bin[ 2047 ] )
     reference_distribution_P[ i-1 ] += outliers_count
     P /= sum(P) // normalize distribution P
     candidate_distribution_Q = quantize [ bin[ 0 ],, bin[ i-1 ] ] into 128 levels // explained later
     expand candidate_distribution_Q to ‘ i ’ bins // explained later
     Q /= sum(Q) // normalize distribution Q
     divergence[ i ] = KL_divergence( reference_distribution_P, candidate_distribution_Q)
End For 
Find index ‘m’ for which divergence[ m ] is minimal
threshold = ( m + 0.5 ) * ( width of a bin )

量化代码

QuantNet

class QuantNet : public ncnn::Net
{
public:
    int get_conv_names();
    int get_conv_bottom_blob_names();
    int get_conv_weight_blob_scales();
    int get_input_names();

public:
    std::vector<std::string> conv_names;
    std::map<std::string,std::string> conv_bottom_blob_names;
    std::map<std::string,std::vector<float> > weight_scales;
    std::vector<std::string> input_names;
};

int QuantNet::get_input_names()
{
    for (size_t i=0; i<layers.size(); i++)
    {
        ncnn::Layer* layer = layers[i];
        if (layer->type == "Input")
        {
            for (size_t  j=0; j<layer->tops.size(); j++)
            {
                int blob_index = layer->tops[j];
                std::string name = blobs[blob_index].name.c_str();
                input_names.push_back(name);
            }
        }
    }

    return 0;
}

int QuantNet::get_conv_names()
{
    for (size_t i=0; i<layers.size(); i++)
    {
        ncnn::Layer* layer = layers[i];
        
        if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
        {
            std::string name = layer->name;
            conv_names.push_back(name);
        }
    }        

    return 0;
}

int QuantNet::get_conv_bottom_blob_names()
{
    // find conv bottom name or index
    for (size_t i=0; i<layers.size(); i++)
    {
        ncnn::Layer* layer = layers[i];
        
        if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
        {
            std::string name = layer->name;
            std::string bottom_blob_name = blobs[layer->bottoms[0]].name;
            conv_bottom_blob_names[name] = bottom_blob_name;
        }
    }

    return 0;
}

int QuantNet::get_conv_weight_blob_scales()
{
    for (size_t i=0; i<layers.size(); i++)
    {
        ncnn::Layer* layer = layers[i];
        
        if (layer->type == "Convolution")
        {
            std::string name = layer->name;
            const int weight_data_size_output = ((ncnn::Convolution*)layer)->weight_data_size / ((ncnn::Convolution*)layer)->num_output;
            std::vector<float> scales;

            // int8 winograd F43 needs weight data to use 6bit quantization
            bool quant_6bit = false;
            int kernel_w = ((ncnn::Convolution*)layer)->kernel_w;
            int kernel_h = ((ncnn::Convolution*)layer)->kernel_h;
            int dilation_w = ((ncnn::Convolution*)layer)->dilation_w;
            int dilation_h = ((ncnn::Convolution*)layer)->dilation_h;
            int stride_w = ((ncnn::Convolution*)layer)->stride_w;
            int stride_h = ((ncnn::Convolution*)layer)->stride_h;

            if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
                quant_6bit = true;

            for (int n=0; n<((ncnn::Convolution*)layer)->num_output; n++)
            {
                const ncnn::Mat weight_data_n = ((ncnn::Convolution*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
                const float *data_n = weight_data_n;
                float max_value = std::numeric_limits<float>::min();

                for (int i = 0; i < weight_data_size_output; i++)
                    max_value = std::max(max_value, std::fabs(data_n[i]));

                if (quant_6bit)
                    scales.push_back(31 / max_value);
                else
                    scales.push_back(127 / max_value);
            }

            weight_scales[name] = scales;
        }
        
        if (layer->type == "ConvolutionDepthWise")
        {
            std::string name = layer->name;
            const int weight_data_size_output = ((ncnn::ConvolutionDepthWise*)layer)->weight_data_size / ((ncnn::ConvolutionDepthWise*)layer)->group;
            std::vector<float> scales;

            for (int n=0; n<((ncnn::ConvolutionDepthWise*)layer)->group; n++)
            {
                const ncnn::Mat weight_data_n = ((ncnn::ConvolutionDepthWise*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
                const float *data_n = weight_data_n;
                float max_value = std::numeric_limits<float>::min();

                for (int i = 0; i < weight_data_size_output; i++)
                    max_value = std::max(max_value, std::fabs(data_n[i]));

                scales.push_back(127 / max_value); 
            }

            weight_scales[name] = scales;                
        }

        if (layer->type == "InnerProduct")
        {
            std::string name = layer->name;
            const int weight_data_size_output = ((ncnn::InnerProduct*)layer)->weight_data_size / ((ncnn::InnerProduct*)layer)->num_output;
            std::vector<float> scales;

            for (int n=0; n<((ncnn::InnerProduct*)layer)->num_output; n++)
            {
                const ncnn::Mat weight_data_n = ((ncnn::InnerProduct*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
                const float *data_n = weight_data_n;
                float max_value = std::numeric_limits<float>::min();

                for (int i = 0; i < weight_data_size_output; i++)
                    max_value = std::max(max_value, std::fabs(data_n[i]));

                scales.push_back(127 / max_value);
            }

            weight_scales[name] = scales;            
        }
    }              

    return 0;
}

QuantizeData

class QuantizeData
{
public:
    QuantizeData(std::string layer_name, int num);

    int initial_blob_max(ncnn::Mat data);
    int initial_histogram_interval();
    int initial_histogram_value();

    int normalize_histogram(); 
    int update_histogram(ncnn::Mat data);

    float compute_kl_divergence(const std::vector<float> &dist_a, const std::vector<float> &dist_b);
    int threshold_distribution(const std::vector<float> &distribution, const int target_bin=128);
    float get_data_blob_scale();

public:
    std::string name;

    float max_value;
    int num_bins;
    float histogram_interval;
    std::vector<float> histogram;
    
    float threshold;
    int threshold_bin;
    float scale;
};

QuantizeData::QuantizeData(std::string layer_name, int num)
{
    name = layer_name;
    max_value = 0.0;
    num_bins = num;
    histogram_interval = 0.0;
    histogram.resize(num_bins);
    initial_histogram_value();
}

int QuantizeData::initial_blob_max(ncnn::Mat data)
{
    int channel_num = data.c;
    int size = data.w * data.h;

    for (int q=0; q<channel_num; q++)
    {
        const float *data_n = data.channel(q);
        for(int i=0; i<size; i++)
        {
            max_value = std::max(max_value, std::fabs(data_n[i]));
        }
    }

    return 0;
}

int QuantizeData::initial_histogram_interval()
{
    histogram_interval = max_value / num_bins;

    return 0;
}

int QuantizeData::initial_histogram_value()
{
    for (size_t i=0; i<histogram.size(); i++)
    {
        histogram[i] = 0.00001;
    }

    return 0;
}

int QuantizeData::normalize_histogram() 
{
    const int length = histogram.size();
    float sum = 0;
    
    for (int i=0; i<length; i++)
        sum += histogram[i];

    for (int i=0; i<length; i++) 
        histogram[i] /= sum;

    return 0;
}

int QuantizeData::update_histogram(ncnn::Mat data)
{
    int channel_num = data.c;
    int size = data.w * data.h;

    for (int q=0; q<channel_num; q++)
    {
        const float *data_n = data.channel(q);
        for(int i=0; i<size; i++)
        {
            if (data_n[i] == 0)
                continue;

            int index = std::min(static_cast<int>(std::abs(data_n[i]) / histogram_interval), 2047);

            histogram[index]++;
        }
    }        

    return 0;
}

float QuantizeData::compute_kl_divergence(const std::vector<float> &dist_a, const std::vector<float> &dist_b) 
{
    const int length = dist_a.size();
    assert(dist_b.size() == length);
    float result = 0;

    for (int i=0; i<length; i++) 
    {
        if (dist_a[i] != 0) 
        {
            if (dist_b[i] == 0) 
            {
                result += 1;
            } 
            else 
            {
                result += dist_a[i] * log(dist_a[i] / dist_b[i]);
            }
        }
    }

    return result;
}

int QuantizeData::threshold_distribution(const std::vector<float> &distribution, const int target_bin) 
{
    int target_threshold = target_bin;
    float min_kl_divergence = 1000;
    const int length = distribution.size();

    std::vector<float> quantize_distribution(target_bin);

    float threshold_sum = 0;
    for (int threshold=target_bin; threshold<length; threshold++) 
    {
        threshold_sum += distribution[threshold];
    }

    for (int threshold=target_bin; threshold<length; threshold++) 
    {

        std::vector<float> t_distribution(distribution.begin(), distribution.begin()+threshold);
        
        t_distribution[threshold-1] += threshold_sum; 
        threshold_sum -= distribution[threshold];

        // get P
        fill(quantize_distribution.begin(), quantize_distribution.end(), 0);
        
        const float num_per_bin = static_cast<float>(threshold) / target_bin;

        for (int i=0; i<target_bin; i++) 
        {
            const float start = i * num_per_bin;
            const float end = start + num_per_bin;

            const int left_upper = ceil(start);
            if (left_upper > start) 
            {
                const float left_scale = left_upper - start;
                quantize_distribution[i] += left_scale * distribution[left_upper - 1];
            }

            const int right_lower = floor(end);

            if (right_lower < end) 
            {

                const float right_scale = end - right_lower;
                quantize_distribution[i] += right_scale * distribution[right_lower];
            }

            for (int j=left_upper; j<right_lower; j++) 
            {
                quantize_distribution[i] += distribution[j];
            }
        }

        // get Q
        std::vector<float> expand_distribution(threshold, 0);

        for (int i=0; i<target_bin; i++) 
        {
            const float start = i * num_per_bin;
            const float end = start + num_per_bin;

            float count = 0;

            const int left_upper = ceil(start);
            float left_scale = 0;
            if (left_upper > start) 
            {
                left_scale = left_upper - start;
                if (distribution[left_upper - 1] != 0) 
                {
                    count += left_scale;
                }
            }

            const int right_lower = floor(end);
            float right_scale = 0;
            if (right_lower < end) 
            {
                right_scale = end - right_lower;
                if (distribution[right_lower] != 0) 
                {
                    count += right_scale;
                }
            }

            for (int j=left_upper; j<right_lower; j++) 
            {
                if (distribution[j] != 0) 
                {
                    count++;
                }
            }

            const float expand_value = quantize_distribution[i] / count;

            if (left_upper > start) 
            {
                if (distribution[left_upper - 1] != 0) 
                {
                    expand_distribution[left_upper - 1] += expand_value * left_scale;
                }
            }
            if (right_lower < end) 
            {
                if (distribution[right_lower] != 0) 
                {
                    expand_distribution[right_lower] += expand_value * right_scale;
                }
            }
            for (int j=left_upper; j<right_lower; j++) 
            {
                if (distribution[j] != 0) 
                {
                    expand_distribution[j] += expand_value;
                }
            }
        }

        // kl
        float kl_divergence = compute_kl_divergence(t_distribution, expand_distribution);

        // the best num of bin
        if (kl_divergence < min_kl_divergence) 
        {
            min_kl_divergence = kl_divergence;
            target_threshold = threshold;
        }
    }

    return target_threshold;
}

float QuantizeData::get_data_blob_scale()
{   
    normalize_histogram();
    threshold_bin = threshold_distribution(histogram);
    threshold = (threshold_bin + 0.5) * histogram_interval;
    scale = 127 / threshold;
    return scale;
}

post_training_quantize

static int post_training_quantize(const std::vector<std::string> filenames, const char* param_path, const char* bin_path, const char* table_path, struct PreParam per_param)
{
    int size = filenames.size();

    QuantNet net;
    net.opt = g_default_option;

    net.load_param(param_path);
    net.load_model(bin_path);

    float mean_vals[3], norm_vals[3];
    int weith = per_param.weith;
    int height = per_param.height;
    bool swapRB = per_param.swapRB;

    mean_vals[0] = per_param.mean[0];
    mean_vals[1] = per_param.mean[1];
    mean_vals[2] = per_param.mean[2];

    norm_vals[0] = per_param.norm[0];
    norm_vals[1] = per_param.norm[1];
    norm_vals[2] = per_param.norm[2];

    g_blob_pool_allocator.clear();
    g_workspace_pool_allocator.clear();

    net.get_input_names();
    net.get_conv_names();
    net.get_conv_bottom_blob_names();
    net.get_conv_weight_blob_scales();

    if (net.input_names.size() <= 0)
    {
        fprintf(stderr, "not found [Input] Layer, Check your ncnn.param \n");
        return -1;
    }
    
    FILE *fp=fopen(table_path, "w");

    // save quantization scale of weight 
    printf("====> Quantize the parameters.\n");    
    for (size_t i=0; i<net.conv_names.size(); i++)
    {
        std::string layer_name = net.conv_names[i];
        std::string blob_name = net.conv_bottom_blob_names[layer_name];
        std::vector<float> weight_scale_n = net.weight_scales[layer_name];

        fprintf(fp, "%s_param_0 ", layer_name.c_str());
        for (size_t j=0; j<weight_scale_n.size(); j++)
            fprintf(fp, "%f ", weight_scale_n[j]);
        fprintf(fp, "\n");        
    }

    // initial quantization data
    std::vector<QuantizeData> quantize_datas;
    
    for (size_t i=0; i<net.conv_names.size(); i++)
    {
        std::string layer_name = net.conv_names[i];

        QuantizeData quantize_data(layer_name, 2048);
        quantize_datas.push_back(quantize_data);
    }    

    // step 1 count the max value
    printf("====> Quantize the activation.\n"); 
    printf("    ====> step 1 : find the max value.\n");

    for (size_t i=0; i<filenames.size(); i++)
    {
        std::string img_name = filenames[i];

        if ((i+1)%100 == 0)
            fprintf(stderr, "          %d/%d\n", (int)(i+1), (int)size);

#if OpenCV_VERSION_MAJOR > 2
        cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
#else
        cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
#endif
        if (bgr.empty())
        {
            fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
            return -1;
        }

        ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, weith, height);
        in.substract_mean_normalize(mean_vals, norm_vals);

        ncnn::Extractor ex = net.create_extractor();
        ex.input(net.input_names[0].c_str(), in);

        for (size_t i=0; i<net.conv_names.size(); i++)
        {
            std::string layer_name = net.conv_names[i];
            std::string blob_name = net.conv_bottom_blob_names[layer_name];

            ncnn::Mat out;
            ex.extract(blob_name.c_str(), out);  

            for (size_t j=0; j<quantize_datas.size(); j++)
            {
                if (quantize_datas[j].name == layer_name)
                {
                    quantize_datas[j].initial_blob_max(out);
                    break;
                }
            }
        }         
    }

    // step 2 histogram_interval
    printf("    ====> step 2 : generate the histogram_interval.\n");
    for (size_t i=0; i<net.conv_names.size(); i++)
    {
        std::string layer_name = net.conv_names[i];

        for (size_t j=0; j<quantize_datas.size(); j++)
        {
            if (quantize_datas[j].name == layer_name)
            {
                quantize_datas[j].initial_histogram_interval();

                fprintf(stderr, "%-20s : max = %-15f interval = %-10f\n", quantize_datas[j].name.c_str(), quantize_datas[j].max_value, quantize_datas[j].histogram_interval);
                break;
            }
        }
    }    

    // step 3 histogram
    printf("    ====> step 3 : generate the histogram.\n");
    for (size_t i=0; i<filenames.size(); i++)
    {
        std::string img_name = filenames[i];

        if ((i+1)%100 == 0)
            fprintf(stderr, "          %d/%d\n", (int)(i+1), (int)size);
#if OpenCV_VERSION_MAJOR > 2
        cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
#else
        cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
#endif
        if (bgr.empty())
        {
            fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
            return -1;
        }  

        ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, weith, height);
        in.substract_mean_normalize(mean_vals, norm_vals);
      
        ncnn::Extractor ex = net.create_extractor();
        ex.input(net.input_names[0].c_str(), in);

        for (size_t i=0; i<net.conv_names.size(); i++)
        {
            std::string layer_name = net.conv_names[i];
            std::string blob_name = net.conv_bottom_blob_names[layer_name];

            ncnn::Mat out;
            ex.extract(blob_name.c_str(), out);  

            for (size_t j=0; j<quantize_datas.size(); j++)
            {
                if (quantize_datas[j].name == layer_name)
                {
                    quantize_datas[j].update_histogram(out);
                    break;
                }
            }
        }     
    }

    // step4 kld
    printf("    ====> step 4 : using kld to find the best threshold value.\n");
    for (size_t i=0; i<net.conv_names.size(); i++)
    {
        std::string layer_name = net.conv_names[i];
        std::string blob_name = net.conv_bottom_blob_names[layer_name];
        fprintf(stderr, "%-20s ", layer_name.c_str());

        for (size_t j=0; j<quantize_datas.size(); j++)
        {
            if (quantize_datas[j].name == layer_name)
            {
                quantize_datas[j].get_data_blob_scale();
                fprintf(stderr, "bin : %-8d threshold : %-15f interval : %-10f scale : %-10f\n", \
                                                                quantize_datas[j].threshold_bin, \
                                                                quantize_datas[j].threshold, \
                                                                quantize_datas[j].histogram_interval, \
                                                                quantize_datas[j].scale);

                fprintf(fp, "%s %f\n", layer_name.c_str(), quantize_datas[j].scale);

                break;
            }
        }
    }

    fclose(fp);
    printf("====> Save the calibration table done.\n");

    return 0;
}

PreParam

struct PreParam
{
    float mean[3];
    float norm[3];
    int weith;
    int height;
    bool swapRB;
};

parse_images_dir

// Get the filenames from direct path
int parse_images_dir(const char *base_path, std::vector<std::string>& file_path)
{
    DIR *dir;
    struct dirent *ptr;

    if ((dir=opendir(base_path)) == NULL)
    {
        perror("Open dir error...");
        exit(1);
    }

    while ((ptr=readdir(dir)) != NULL)
    {
        if(strcmp(ptr->d_name,".")==0 || strcmp(ptr->d_name,"..")==0)    ///current dir OR parrent dir
        {
            continue;
        } 

        std::string path = base_path;
        file_path.push_back(path + ptr->d_name);
    }
    closedir(dir);

    return 0;
}

修改ncnn\tools目录下的CMakeLists.txt文件

add_subdirectory(caffe)
add_subdirectory(mxnet)
add_subdirectory(onnx)
# add_subdirectory(quantize)

add_subdirectory(caffe)
add_subdirectory(mxnet)
add_subdirectory(onnx)
add_subdirectory(quantize)

参考资料
1 ncnn https://github.com/Tencent/ncnn
2 NCNN Conv量化详解(一) https://zhuanlan.zhihu.com/p/71881443
3 NCNN量化详解(二) https://zhuanlan.zhihu.com/p/72375164
4 人工智能学习干货|深度学习模型量化理论+实践 https://www.toutiao.com/i6776432142281867788/

发布了270 篇原创文章 · 获赞 344 · 访问量 65万+

猜你喜欢

转载自blog.csdn.net/shanglianlm/article/details/103745674