ncnn之六:ncnn量化(post-training quantization)三部曲 - ncnnoptimize

1 NetOptimize

定义类 NetOptimize 用于优化网络结构

class NetOptimize : public ncnn::Net
{
public:
    // 0=fp32 1=fp16
    int storage_type;

public:
    int fuse_batchnorm_scale();
    int fuse_convolution_batchnorm();
    int fuse_convolutiondepthwise_batchnorm();
    int fuse_deconvolution_batchnorm();
    int fuse_deconvolutiondepthwise_batchnorm();
    int fuse_innerproduct_batchnorm();
    int fuse_innerproduct_dropout();
    int fuse_convolution_activation();
    int fuse_convolutiondepthwise_activation();
    int fuse_deconvolution_activation();
    int fuse_deconvolutiondepthwise_activation();
    int fuse_innerproduct_activation();

    int eliminate_dropout();
    int eliminate_pooling1x1();
    int eliminate_noop();
    int eliminate_orphaned_memorydata();
    int eliminate_flatten_after_global_pooling();
    int eliminate_reshape_after_global_pooling();
    int eliminate_flatten_after_innerproduct();
    int eliminate_reshape_before_binaryop();

    int replace_convolution_with_innerproduct_after_global_pooling();
    int replace_convolution_with_innerproduct_after_innerproduct();

public:
    int fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp);
    int fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp);

    int fwrite_weight_tag_data(int tag, const ncnn::Mat& data, FILE* bp);
    int fwrite_weight_data(const ncnn::Mat& data, FILE* bp);

    int save(const char* parampath, const char* binpath);

#if defined(__aarch64__) && defined(LINUX)
    void gauss_random(ncnn::Mat &m);
    void find_fastest_fp32_conv(const char* name, int w, int h, int c);
    int support_fp32_conv_type(const ncnn::Convolution* op, const ncnn::Mat& mat, const int type);
#endif
};

1-1 fuse

1-1-1 fuse_batchnorm_scale (融合 bn和 scale)

v = ((v - mean) / sqrt(var + eps) * slope + bias) * s + b = (v - mean) / sqrt(var + eps) * (slope * s) + (bias * s + b)

int NetOptimize::fuse_batchnorm_scale()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "BatchNorm")
            continue;

        // BatchNorm - Scale
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "Scale")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse BatchNorm - Scale to BatchNorm
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[i];
        ncnn::Scale* scale = (ncnn::Scale*)layers[j];

        fprintf(stderr, "fuse_batchnorm_scale %s %s\n", batchnorm->name.c_str(), scale->name.c_str());

        {
//             v = ((v - mean) / sqrt(var + eps) * slope + bias) * s + b
//               =  (v - mean) / sqrt(var + eps) * (slope * s) + (bias * s + b)

            int channels = batchnorm->channels;

            float* slope = batchnorm->slope_data;
            float* bias = batchnorm->bias_data;

            for (int q=0; q<channels; q++)
            {
                slope[q] = slope[q] * scale->scale_data[q];
                if (scale->bias_term)
                    bias[q] = bias[q] * scale->scale_data[q] + scale->bias_data[q];
                else
                    bias[q] = bias[q] * scale->scale_data[q];
            }
        }

        int top_blob_index_final = scale->tops[0];
        batchnorm->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        scale->type = "ncnnfused";
    }

    return 0;
}

1-1-2 fuse_convolution_batchnorm (融合 bn和 convolution)

// a = bias - slope * mean / sqrt(var + eps)
// b = slope / sqrt(var + eps)
// value = value * b + a

fuse_convolution_batchnorm

int NetOptimize::fuse_convolution_batchnorm()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "Convolution")
            continue;

        // Convolution - BatchNorm
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "BatchNorm")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse Convolution - BatchNorm to Convolution
        ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];

        fprintf(stderr, "fuse_convolution_batchnorm %s %s\n", convolution->name.c_str(), batchnorm->name.c_str());

        {
            int channels = batchnorm->channels;
            float eps = batchnorm->eps;

            // a = bias - slope * mean / sqrt(var + eps)
            // b = slope / sqrt(var + eps)
            // value = value * b + a

            std::vector<float> a(channels);
            std::vector<float> b(channels);
            for (int i=0; i<channels; i++)
            {
                float sqrt_var = static_cast<float>(sqrt(batchnorm->var_data[i] + eps));
                a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
                b[i] = batchnorm->slope_data[i] / sqrt_var;
            }

            if (convolution->bias_term == 0)
            {
                // init bias as zero
                convolution->bias_term = 1;
                convolution->bias_data = ncnn::Mat(channels);
                convolution->bias_data.fill(0.f);
            }

            const int weight_per_outch = convolution->weight_data_size / channels;

            float* weight = convolution->weight_data;
            float* bias = convolution->bias_data;
            for (int i=0; i<channels; i++)
            {
                float* conv_weight_outch = weight + weight_per_outch * i;
                for (int j=0; j<weight_per_outch; j++)
                {
                    conv_weight_outch[j] *= b[i];
                }

                bias[i] = bias[i] * b[i] + a[i];
            }
        }

        int top_blob_index_final = batchnorm->tops[0];
        convolution->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        batchnorm->type = "ncnnfused";
    }

    return 0;
}

fuse_convolutiondepthwise_batchnorm

int NetOptimize::fuse_convolutiondepthwise_batchnorm()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "ConvolutionDepthWise")
            continue;

        // ConvolutionDepthWise - BatchNorm
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "BatchNorm")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse ConvolutionDepthWise - BatchNorm to ConvolutionDepthWise
        ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];

        fprintf(stderr, "fuse_convolutiondepthwise_batchnorm %s %s\n", convolutiondepthwise->name.c_str(), batchnorm->name.c_str());

        {
            int channels = batchnorm->channels;
            float eps = batchnorm->eps;

            // a = bias - slope * mean / sqrt(var + eps)
            // b = slope / sqrt(var + eps)
            // value = value * b + a

            std::vector<float> a(channels);
            std::vector<float> b(channels);
            for (int i=0; i<channels; i++)
            {
                float sqrt_var = static_cast<float>(sqrt(batchnorm->var_data[i] + eps));
                a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
                b[i] = batchnorm->slope_data[i] / sqrt_var;
            }

            if (convolutiondepthwise->bias_term == 0)
            {
                // init bias as zero
                convolutiondepthwise->bias_term = 1;
                convolutiondepthwise->bias_data = ncnn::Mat(channels);
                convolutiondepthwise->bias_data.fill(0.f);
            }

            const int weight_per_outch = convolutiondepthwise->weight_data_size / channels;

            float* weight = convolutiondepthwise->weight_data;
            float* bias = convolutiondepthwise->bias_data;
            for (int i=0; i<channels; i++)
            {
                float* conv_weight_outch = weight + weight_per_outch * i;
                for (int j=0; j<weight_per_outch; j++)
                {
                    conv_weight_outch[j] *= b[i];
                }

                bias[i] = bias[i] * b[i] + a[i];
            }
        }

        int top_blob_index_final = batchnorm->tops[0];
        convolutiondepthwise->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        batchnorm->type = "ncnnfused";
    }

    return 0;
}

fuse_deconvolution_batchnorm

int NetOptimize::fuse_deconvolution_batchnorm()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "Deconvolution")
            continue;

        // Deconvolution - BatchNorm
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "BatchNorm")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse Deconvolution - BatchNorm to Deconvolution
        ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];

        fprintf(stderr, "fuse_deconvolution_batchnorm %s %s\n", deconvolution->name.c_str(), batchnorm->name.c_str());

        {
            int channels = batchnorm->channels;
            float eps = batchnorm->eps;

            // a = bias - slope * mean / sqrt(var + eps)
            // b = slope / sqrt(var + eps)
            // value = value * b + a

            std::vector<float> a(channels);
            std::vector<float> b(channels);
            for (int i=0; i<channels; i++)
            {
                float sqrt_var = static_cast<float>(sqrt(batchnorm->var_data[i] + eps));
                a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
                b[i] = batchnorm->slope_data[i] / sqrt_var;
            }

            if (deconvolution->bias_term == 0)
            {
                // init bias as zero
                deconvolution->bias_term = 1;
                deconvolution->bias_data = ncnn::Mat(channels);
                deconvolution->bias_data.fill(0.f);
            }

            const int weight_per_outch = deconvolution->weight_data_size / channels;

            float* weight = deconvolution->weight_data;
            float* bias = deconvolution->bias_data;
            for (int i=0; i<channels; i++)
            {
                float* conv_weight_outch = weight + weight_per_outch * i;
                for (int j=0; j<weight_per_outch; j++)
                {
                    conv_weight_outch[j] *= b[i];
                }

                bias[i] = bias[i] * b[i] + a[i];
            }
        }

        int top_blob_index_final = batchnorm->tops[0];
        deconvolution->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        batchnorm->type = "ncnnfused";
    }

    return 0;
}

fuse_deconvolutiondepthwise_batchnorm

int NetOptimize::fuse_deconvolutiondepthwise_batchnorm()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "DeconvolutionDepthWise")
            continue;

        // DeconvolutionDepthWise - BatchNorm
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "BatchNorm")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse DeconvolutionDepthWise - BatchNorm to DeconvolutionDepthWise
        ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i];
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];

        fprintf(stderr, "fuse_deconvolutiondepthwise_batchnorm %s %s\n", deconvolutiondepthwise->name.c_str(), batchnorm->name.c_str());

        {
            int channels = batchnorm->channels;
            float eps = batchnorm->eps;

            // a = bias - slope * mean / sqrt(var + eps)
            // b = slope / sqrt(var + eps)
            // value = value * b + a

            std::vector<float> a(channels);
            std::vector<float> b(channels);
            for (int i=0; i<channels; i++)
            {
                float sqrt_var = static_cast<float>(sqrt(batchnorm->var_data[i] + eps));
                a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
                b[i] = batchnorm->slope_data[i] / sqrt_var;
            }

            if (deconvolutiondepthwise->bias_term == 0)
            {
                // init bias as zero
                deconvolutiondepthwise->bias_term = 1;
                deconvolutiondepthwise->bias_data = ncnn::Mat(channels);
                deconvolutiondepthwise->bias_data.fill(0.f);
            }

            const int weight_per_outch = deconvolutiondepthwise->weight_data_size / channels;

            float* weight = deconvolutiondepthwise->weight_data;
            float* bias = deconvolutiondepthwise->bias_data;
            for (int i=0; i<channels; i++)
            {
                float* conv_weight_outch = weight + weight_per_outch * i;
                for (int j=0; j<weight_per_outch; j++)
                {
                    conv_weight_outch[j] *= b[i];
                }

                bias[i] = bias[i] * b[i] + a[i];
            }
        }

        int top_blob_index_final = batchnorm->tops[0];
        deconvolutiondepthwise->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        batchnorm->type = "ncnnfused";
    }

    return 0;
}

fuse_innerproduct_batchnorm

int NetOptimize::fuse_innerproduct_batchnorm()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "InnerProduct")
            continue;

        // InnerProduct - BatchNorm
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "BatchNorm")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse InnerProduct - BatchNorm to InnerProduct
        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];

        fprintf(stderr, "fuse_innerproduct_batchnorm %s %s\n", innerproduct->name.c_str(), batchnorm->name.c_str());

        {
            int channels = batchnorm->channels;
            float eps = batchnorm->eps;

            // a = bias - slope * mean / sqrt(var + eps)
            // b = slope / sqrt(var + eps)
            // value = value * b + a

            std::vector<float> a(channels);
            std::vector<float> b(channels);
            for (int i=0; i<channels; i++)
            {
                float sqrt_var = static_cast<float>(sqrt(batchnorm->var_data[i] + eps));
                a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
                b[i] = batchnorm->slope_data[i] / sqrt_var;
            }

            if (innerproduct->bias_term == 0)
            {
                // init bias as zero
                innerproduct->bias_term = 1;
                innerproduct->bias_data = ncnn::Mat(channels);
                innerproduct->bias_data.fill(0.f);
            }

            const int weight_per_outch = innerproduct->weight_data_size / channels;

            float* weight = innerproduct->weight_data;
            float* bias = innerproduct->bias_data;
            for (int i=0; i<channels; i++)
            {
                float* conv_weight_outch = weight + weight_per_outch * i;
                for (int j=0; j<weight_per_outch; j++)
                {
                    conv_weight_outch[j] *= b[i];
                }

                bias[i] = bias[i] * b[i] + a[i];
            }
        }

        int top_blob_index_final = batchnorm->tops[0];
        innerproduct->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        batchnorm->type = "ncnnfused";
    }

    return 0;
}

1-1-3 fuse_innerproduct_dropout

int NetOptimize::fuse_innerproduct_dropout()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "InnerProduct")
            continue;

        // InnerProduct - Dropout
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "Dropout")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse InnerProduct - Dropout to InnerProduct
        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
        ncnn::Dropout* dropout = (ncnn::Dropout*)layers[j];

        fprintf(stderr, "fuse_innerproduct_dropout %s %s\n", innerproduct->name.c_str(), dropout->name.c_str());

        float scale = dropout->scale;
        if (scale != 1.f)
        {
            const int num_output = innerproduct->num_output;
            const int weight_per_outch = innerproduct->weight_data_size / num_output;

            float* weight = innerproduct->weight_data;
            for (int i=0; i<num_output; i++)
            {
                float* conv_weight_outch = weight + weight_per_outch * i;
                for (int j=0; j<weight_per_outch; j++)
                {
                    conv_weight_outch[j] *= scale;
                }
            }

            if (innerproduct->bias_term)
            {
                float* bias = innerproduct->bias_data;
                for (int i=0; i<num_output; i++)
                {
                    bias[i] *= scale;
                }
            }
        }

        int top_blob_index_final = dropout->tops[0];
        innerproduct->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        dropout->type = "ncnnfused";
    }

    return 0;
}

1-1-4 fuse_convolution_activation

fuse_convolution_activation

int NetOptimize::fuse_convolution_activation()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "Convolution")
            continue;

        // Convolution - Activation
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse Convolution - Activation to Convolution
        ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
        ncnn::Layer* activation = layers[j];

        fprintf(stderr, "fuse_convolution_activation %s %s\n", convolution->name.c_str(), activation->name.c_str());

        if (activation->type == "ReLU")
        {
            ncnn::ReLU* relu = (ncnn::ReLU*)activation;

            if (relu->slope == 0.f)
            {
                convolution->activation_type = 1;
            }
            else
            {
                convolution->activation_type = 2;
                convolution->activation_params = ncnn::Mat(1);
                convolution->activation_params[0] = relu->slope;
            }
        }
        else if (activation->type == "Clip")
        {
            ncnn::Clip* clip = (ncnn::Clip*)activation;

            convolution->activation_type = 3;
            convolution->activation_params = ncnn::Mat(2);
            convolution->activation_params[0] = clip->min;
            convolution->activation_params[1] = clip->max;
        }
        else if (activation->type == "Sigmoid")
        {
            convolution->activation_type = 4;
        }

        int top_blob_index_final = activation->tops[0];
        convolution->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        activation->type = "ncnnfused";
    }

    return 0;
}

fuse_convolutiondepthwise_activation

int NetOptimize::fuse_convolutiondepthwise_activation()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "ConvolutionDepthWise")
            continue;

        // ConvolutionDepthWise - Activation
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse ConvolutionDepthWise - Activation to ConvolutionDepthWise
        ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
        ncnn::Layer* activation = layers[j];

        fprintf(stderr, "fuse_convolutiondepthwise_activation %s %s\n", convolutiondepthwise->name.c_str(), activation->name.c_str());

        if (activation->type == "ReLU")
        {
            ncnn::ReLU* relu = (ncnn::ReLU*)activation;

            if (relu->slope == 0.f)
            {
                convolutiondepthwise->activation_type = 1;
            }
            else
            {
                convolutiondepthwise->activation_type = 2;
                convolutiondepthwise->activation_params = ncnn::Mat(1);
                convolutiondepthwise->activation_params[0] = relu->slope;
            }
        }
        else if (activation->type == "Clip")
        {
            ncnn::Clip* clip = (ncnn::Clip*)activation;

            convolutiondepthwise->activation_type = 3;
            convolutiondepthwise->activation_params = ncnn::Mat(2);
            convolutiondepthwise->activation_params[0] = clip->min;
            convolutiondepthwise->activation_params[1] = clip->max;
        }
        else if (activation->type == "Sigmoid")
        {
            convolutiondepthwise->activation_type = 4;
        }

        int top_blob_index_final = activation->tops[0];
        convolutiondepthwise->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        activation->type = "ncnnfused";
    }

    return 0;
}

fuse_deconvolution_activation

int NetOptimize::fuse_deconvolution_activation()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "Deconvolution")
            continue;

        // Deconvolution - Activation
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse Deconvolution - Activation to Deconvolution
        ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
        ncnn::Layer* activation = layers[j];

        fprintf(stderr, "fuse_deconvolution_activation %s %s\n", deconvolution->name.c_str(), activation->name.c_str());

        if (activation->type == "ReLU")
        {
            ncnn::ReLU* relu = (ncnn::ReLU*)activation;

            if (relu->slope == 0.f)
            {
                deconvolution->activation_type = 1;
            }
            else
            {
                deconvolution->activation_type = 2;
                deconvolution->activation_params = ncnn::Mat(1);
                deconvolution->activation_params[0] = relu->slope;
            }
        }
        else if (activation->type == "Clip")
        {
            ncnn::Clip* clip = (ncnn::Clip*)activation;

            deconvolution->activation_type = 3;
            deconvolution->activation_params = ncnn::Mat(2);
            deconvolution->activation_params[0] = clip->min;
            deconvolution->activation_params[1] = clip->max;
        }
        else if (activation->type == "Sigmoid")
        {
            deconvolution->activation_type = 4;
        }

        int top_blob_index_final = activation->tops[0];
        deconvolution->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        activation->type = "ncnnfused";
    }

    return 0;
}

fuse_deconvolutiondepthwise_activation

int NetOptimize::fuse_deconvolutiondepthwise_activation()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "DeconvolutionDepthWise")
            continue;

        // DeconvolutionDepthWise - Activation
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse DeconvolutionDepthWise - Activation to DeconvolutionDepthWise
        ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i];
        ncnn::Layer* activation = layers[j];

        fprintf(stderr, "fuse_deconvolutiondepthwise_activation %s %s\n", deconvolutiondepthwise->name.c_str(), activation->name.c_str());

        if (activation->type == "ReLU")
        {
            ncnn::ReLU* relu = (ncnn::ReLU*)activation;

            if (relu->slope == 0.f)
            {
                deconvolutiondepthwise->activation_type = 1;
            }
            else
            {
                deconvolutiondepthwise->activation_type = 2;
                deconvolutiondepthwise->activation_params = ncnn::Mat(1);
                deconvolutiondepthwise->activation_params[0] = relu->slope;
            }
        }
        else if (activation->type == "Clip")
        {
            ncnn::Clip* clip = (ncnn::Clip*)activation;

            deconvolutiondepthwise->activation_type = 3;
            deconvolutiondepthwise->activation_params = ncnn::Mat(2);
            deconvolutiondepthwise->activation_params[0] = clip->min;
            deconvolutiondepthwise->activation_params[1] = clip->max;
        }
        else if (activation->type == "Sigmoid")
        {
            deconvolutiondepthwise->activation_type = 4;
        }

        int top_blob_index_final = activation->tops[0];
        deconvolutiondepthwise->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        activation->type = "ncnnfused";
    }

    return 0;
}

fuse_innerproduct_activation

int NetOptimize::fuse_innerproduct_activation()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "InnerProduct")
            continue;

        // InnerProduct - Activation
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        // fuse InnerProduct - Activation to InnerProduct
        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
        ncnn::Layer* activation = layers[j];

        fprintf(stderr, "fuse_innerproduct_activation %s %s\n", innerproduct->name.c_str(), activation->name.c_str());

        if (activation->type == "ReLU")
        {
            ncnn::ReLU* relu = (ncnn::ReLU*)activation;

            if (relu->slope == 0.f)
            {
                innerproduct->activation_type = 1;
            }
            else
            {
                innerproduct->activation_type = 2;
                innerproduct->activation_params = ncnn::Mat(1);
                innerproduct->activation_params[0] = relu->slope;
            }
        }
        else if (activation->type == "Clip")
        {
            ncnn::Clip* clip = (ncnn::Clip*)activation;

            innerproduct->activation_type = 3;
            innerproduct->activation_params = ncnn::Mat(2);
            innerproduct->activation_params[0] = clip->min;
            innerproduct->activation_params[1] = clip->max;
        }
        else if (activation->type == "Sigmoid")
        {
            innerproduct->activation_type = 4;
        }

        int top_blob_index_final = activation->tops[0];
        innerproduct->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        activation->type = "ncnnfused";
    }

    return 0;
}

1-2 eliminate

1-2-1 eliminate_dropout

int NetOptimize::eliminate_dropout()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "Dropout")
            continue;

        ncnn::Dropout* dropout = (ncnn::Dropout*)layers[i];
        if (dropout->scale != 1.f)
            continue;

        // Any - Dropout
        int bottom_blob_index = layers[i]->bottoms[0];

        int j = i - 1;
        for (; j>=0; j--)
        {
            if (layers[j]->type == "ncnnfused")
                continue;

            if (layers[j]->tops.size() != 1)
                continue;

            if (layers[j]->tops[0] == bottom_blob_index)
                break;
        }

        if (j == -1)
            continue;

        ncnn::Layer* any = layers[j];

        fprintf(stderr, "eliminate_dropout %s %s\n", any->name.c_str(), dropout->name.c_str());

        int top_blob_index_final = dropout->tops[0];
        any->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = j;
        dropout->type = "ncnnfused";
    }

    return 0;
}

1-2-2 eliminate_pooling1x1

int NetOptimize::eliminate_pooling1x1()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "Pooling")
            continue;

        ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
        if (pooling->pad_left != 0 || pooling->pad_right != 0 || pooling->pad_top != 0 || pooling->pad_bottom != 0)
            continue;

        if (pooling->kernel_w != 1 || pooling->kernel_h != 1 || pooling->stride_w != 1 || pooling->stride_h != 1)
            continue;

        if (pooling->global_pooling != 0)
            continue;

        // Any - Pooling
        int bottom_blob_index = layers[i]->bottoms[0];

        int top_i = -1;
        int j = i - 1;
        for (; j>=0; j--)
        {
            if (layers[j]->type == "ncnnfused")
                continue;

            for (int k=0; k<layers[j]->tops.size(); k++)
            {
                if (layers[j]->tops[k] == bottom_blob_index)
                {
                    top_i = k;
                    break;
                }
            }

            if (top_i != -1)
                break;
        }

        if (j == -1)
            continue;

        ncnn::Layer* any = layers[j];

        fprintf(stderr, "eliminate_pooling1x1 %s %s\n", any->name.c_str(), pooling->name.c_str());

        int top_blob_index_final = pooling->tops[0];
        any->tops[top_i] = top_blob_index_final;
        blobs[top_blob_index_final].producer = j;
        pooling->type = "ncnnfused";
    }

    return 0;
}

1-2-3 eliminate_noop

int NetOptimize::eliminate_noop()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "Noop")
            continue;

        ncnn::Layer* noop = layers[i];

        if (noop->bottoms.empty())
        {
            // Noop
            fprintf(stderr, "eliminate_noop %s\n", noop->name.c_str());

            size_t top_blob_count = noop->tops.size();
            for (int k=0; k<top_blob_count; k++)
            {
                int top_blob_index_final = noop->tops[k];
                blobs[top_blob_index_final].producer = -1;
            }
            noop->type = "ncnnfused";

            continue;
        }

        // Any - Noop
        int bottom_blob_index = layers[i]->bottoms[0];

        int j = i - 1;
        for (; j>=0; j--)
        {
            if (layers[j]->type == "ncnnfused")
                continue;

            if (layers[j]->tops.size() != 1)
                continue;

            if (layers[j]->tops[0] == bottom_blob_index)
                break;
        }

        if (j == -1)
            continue;

        ncnn::Layer* any = layers[j];

        fprintf(stderr, "eliminate_noop %s %s\n", any->name.c_str(), noop->name.c_str());

        size_t top_blob_count = std::min(noop->tops.size(), any->tops.size());
        for (int k=0; k<top_blob_count; k++)
        {
            int top_blob_index_final = noop->tops[k];
            any->tops[k] = top_blob_index_final;
            blobs[top_blob_index_final].producer = j;
        }
        noop->type = "ncnnfused";
    }

    return 0;
}

1-2-4 eliminate_orphaned_memorydata

int NetOptimize::eliminate_orphaned_memorydata()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "MemoryData")
            continue;

        // MemoryData - X
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type == "ncnnfused")
                continue;

            bool orphaned = true;
            for (int k=0; k<layers[j]->bottoms.size(); k++)
            {
                if (layers[j]->bottoms[k] == top_blob_index)
                {
                    orphaned = false;
                    break;
                }
            }

            if (!orphaned)
                break;
        }

        if (j < layer_count)
            continue;

        // assert orphaned == true
        fprintf(stderr, "eliminate_orphaned_memorydata %s\n", layers[i]->name.c_str());

        layers[i]->type = "ncnnfused";
    }

    return 0;
}

1-2-5 eliminate_reshape_after_global_pooling

int NetOptimize::eliminate_reshape_after_global_pooling()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "Pooling")
            continue;

        ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
        if (pooling->global_pooling == 0)
            continue;

        // Pooling - Reshape
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "Reshape")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        ncnn::Reshape* reshape = (ncnn::Reshape*)layers[j];
        if (reshape->h != -233 || reshape->c != -233 || reshape->permute != 0)
            continue;

        fprintf(stderr, "eliminate_reshape_after_global_pooling %s %s\n", pooling->name.c_str(), reshape->name.c_str());

        int top_blob_index_final = reshape->tops[0];
        pooling->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        reshape->type = "ncnnfused";
    }

    return 0;
}

1-2-6 eliminate_flatten_after_global_pooling

int NetOptimize::eliminate_flatten_after_global_pooling()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "Pooling")
            continue;

        ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
        if (pooling->global_pooling == 0)
            continue;

        // Pooling - Flatten
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "Flatten")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        ncnn::Flatten* flatten = (ncnn::Flatten*)layers[j];

        fprintf(stderr, "eliminate_flatten_after_global_pooling %s %s\n", pooling->name.c_str(), flatten->name.c_str());

        int top_blob_index_final = flatten->tops[0];
        pooling->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        flatten->type = "ncnnfused";
    }

    return 0;
}

1-2-7 eliminate_flatten_after_innerproduct

int NetOptimize::eliminate_flatten_after_innerproduct()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "InnerProduct")
            continue;

        // InnerProduct - Flatten
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "Flatten")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
        ncnn::Flatten* flatten = (ncnn::Flatten*)layers[j];

        fprintf(stderr, "eliminate_flatten_after_innerproduct %s %s\n", innerproduct->name.c_str(), flatten->name.c_str());

        int top_blob_index_final = flatten->tops[0];
        innerproduct->tops[0] = top_blob_index_final;
        blobs[top_blob_index_final].producer = i;
        flatten->type = "ncnnfused";
    }

    return 0;
}

1-2-8 eliminate_reshape_before_binaryop

int NetOptimize::eliminate_reshape_before_binaryop()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "Reshape")
            continue;

        ncnn::Reshape* reshape = (ncnn::Reshape*)layers[i];
        if (reshape->w != 1 || reshape->h != 1 || reshape->permute != 0)
            continue;

        // Reshape - BinaryOp
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "BinaryOp")
                continue;

            if (layers[j]->bottoms.size() != 2)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index || layers[j]->bottoms[1] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j];

        fprintf(stderr, "eliminate_reshape_before_binaryop %s %s\n", reshape->name.c_str(), binaryop->name.c_str());

        int bottom_blob_index_final = reshape->bottoms[0];
        if (layers[j]->bottoms[0] == top_blob_index)
            binaryop->bottoms[0] = bottom_blob_index_final;
        if (layers[j]->bottoms[1] == top_blob_index)
            binaryop->bottoms[1] = bottom_blob_index_final;
        blobs[bottom_blob_index_final].consumers.erase(std::find(blobs[bottom_blob_index_final].consumers.begin(), blobs[bottom_blob_index_final].consumers.end(), i));
        blobs[bottom_blob_index_final].consumers.push_back(j);
        reshape->type = "ncnnfused";
    }

    return 0;
}

1-3 replace

1-3-1 replace_convolution_with_innerproduct_after_global_pooling

int NetOptimize::replace_convolution_with_innerproduct_after_global_pooling()
{
    const size_t layer_count = layers.size();
    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "Pooling")
            continue;

        ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
        if (pooling->global_pooling == 0)
            continue;

        // Pooling - Convolution
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "Convolution")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        ncnn::Convolution* convolution = (ncnn::Convolution*)layers[j];

        fprintf(stderr, "replace_convolution_with_innerproduct_after_global_pooling %s %s\n", pooling->name.c_str(), convolution->name.c_str());

        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)ncnn::create_layer("InnerProduct");

        innerproduct->type = "InnerProduct";
        innerproduct->name = convolution->name;
        innerproduct->bottoms = convolution->bottoms;
        innerproduct->tops = convolution->tops;

        ncnn::ParamDict pd;
        innerproduct->load_param(pd);

        innerproduct->num_output = convolution->num_output;
        innerproduct->bias_term = convolution->bias_term;
        innerproduct->weight_data_size = convolution->weight_data_size;

        innerproduct->weight_data = convolution->weight_data;
        innerproduct->bias_data = convolution->bias_data;

        innerproduct->activation_type = convolution->activation_type;
        innerproduct->activation_params = convolution->activation_params;

        layers[j] = innerproduct;
        delete convolution;
    }

    return 0;
}

1-3-2 replace_convolution_with_innerproduct_after_innerproduct

int NetOptimize::replace_convolution_with_innerproduct_after_innerproduct()
{
    const size_t layer_count = layers.size();
    for (;;)
    {
    bool replaced = false;

    for (int i=0; i<layer_count; i++)
    {
        if (layers[i]->type != "InnerProduct")
            continue;

        // InnerProduct - Convolution
        int top_blob_index = layers[i]->tops[0];

        int j = i + 1;
        for (; j<layer_count; j++)
        {
            if (layers[j]->type != "Convolution")
                continue;

            if (layers[j]->bottoms.size() != 1)
                continue;

            if (layers[j]->bottoms[0] == top_blob_index)
                break;
        }

        if (j == layer_count)
            continue;

        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
        ncnn::Convolution* convolution = (ncnn::Convolution*)layers[j];

        fprintf(stderr, "replace_convolution_with_innerproduct_after_innerproduct %s %s\n", innerproduct->name.c_str(), convolution->name.c_str());

        ncnn::InnerProduct* innerproduct2 = (ncnn::InnerProduct*)ncnn::create_layer("InnerProduct");

        innerproduct2->type = "InnerProduct";
        innerproduct2->name = convolution->name;
        innerproduct2->bottoms = convolution->bottoms;
        innerproduct2->tops = convolution->tops;

        ncnn::ParamDict pd;
        innerproduct2->load_param(pd);

        innerproduct2->num_output = convolution->num_output;
        innerproduct2->bias_term = convolution->bias_term;
        innerproduct2->weight_data_size = convolution->weight_data_size;

        innerproduct2->weight_data = convolution->weight_data;
        innerproduct2->bias_data = convolution->bias_data;

        innerproduct2->activation_type = convolution->activation_type;
        innerproduct2->activation_params = convolution->activation_params;

        layers[j] = innerproduct2;
        delete convolution;

        replaced = true;
    }

    if (!replaced)
        break;
    }

    return 0;
}


参考资料
1 ncnn https://github.com/Tencent/ncnn
2 NCNN Conv量化详解(一) https://zhuanlan.zhihu.com/p/71881443
3 NCNN量化详解(二) https://zhuanlan.zhihu.com/p/72375164

发布了258 篇原创文章 · 获赞 335 · 访问量 64万+

猜你喜欢

转载自blog.csdn.net/shanglianlm/article/details/103746080