NCNN加速框架介绍

NCNN（Nihui’s CNN）是一个轻量级、高性能的深度学习推理框架，由腾讯优图实验室的大神Nihui开发。该框架在移动端应用和嵌入式设备上实现了高效的深度学习模型推理，具有较低的内存占用和高度优化的计算性能。

特点

轻量级：NCNN的核心代码非常精简，没有任何第三方依赖，只有一个头文件和一个源文件，非常适合移植到各种平台上使用。

高性能：NCNN通过优化计算流程，充分发挥硬件平台的计算能力，实现了高效的模型推理。在多核CPU和GPU上都能够获得很好的加速效果。

多平台支持：NCNN支持多种操作系统和硬件平台，包括Android、iOS、Linux等。同时，它还提供了Caffe和Tensorflow的模型转换工具，可以方便地将其他框架的模型转换为NCNN可用的格式。

低内存占用：NCNN在设计上采用了内存共享和复用的策略，有效减小了内存占用。这对于移动设备和嵌入式设备来说非常重要，可以降低能耗和提高系统性能。

易于使用：NCNN提供了简洁的API接口，方便用户进行模型的载入和推理。同时，它还提供了丰富的示例代码和文档，帮助用户快速上手。

NCNN对各种硬件平台的指令集进行了充分的利用，例如SIMD指令集（如ARM的NEON指令集、x86的SSE指令集等）和GPU的并行计算能力。通过使用这些指令集，NCNN能够并行地执行计算任务，提高计算效率。

示例代码

比较文件夹下两两图片的相似度，模型采用arcface。


#ifndef Retinaface_RetinafacePostSelfPlug_H
#define Retinaface_RetinafacePostSelfPlug_H
#include <iostream>
#include <vector>
#include <cmath>
#include <algorithm>
#include <net.h>
#include <mat.h>
#include <opencv2/opencv.hpp>
#include <chrono>
// 计算特征向量的单位向量
std::vector<float> calculateUnitVector(const std::vector<float>& feature)
{
    
    
    // for (const auto& element : feature) {
    
    
    //     std::cout << element << " ";
    // }

    std::vector<float> unitVector(feature.size());
    float norm = 0.0f;
    for (float value : feature)
    {
    
    
        norm += value * value;
    }
    norm = std::sqrt(norm);
    std::transform(feature.begin(), feature.end(), unitVector.begin(), [norm](float value) {
    
    
        return value / norm;
    });
    return unitVector;
}
std::vector<float> extractFeatureVector(const std::string& imagePath)
{
    
    
    std::string modelPath = "/home/kylin/ncnnbuild2/ncnnwork/model/iresnetface1s.param";
    std::string weightPath = "/home/kylin/ncnnbuild2/ncnnwork/model/iresnetbin1s.bin";
    cv::Mat image = cv::imread(imagePath);
    
    // 加载模型
    ncnn::Net net;
    ncnn::Option opt;
    net.opt.use_fp16_packed = false;
    net.opt.use_fp16_storage = false;
    net.opt.use_fp16_arithmetic = false;
    opt.num_threads = 4; 
    opt.use_vulkan_compute = false; 
    net.opt=opt;
    net.load_param(modelPath.c_str());
    net.load_model(weightPath.c_str());
    
    // 读取图片
    //ncnn::Mat input = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR2RGB, image.cols, image.rows);
    ncnn::Mat input = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR, image.cols, image.rows);
    // 减均值除标准差
    // const float mean_vals[3] = { 127.5f, 127.5f, 127.5f };
    // const float norm_vals[3] = { 1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5 };
    // input.substract_mean_normalize(mean_vals, norm_vals);
    //如果模型是keras，输入为NHWC时需要的额外变换：
    // ncnn::Mat oo;
    // ncnn::convert_packing(input,oo,3);
    // 创建推理对象
    ncnn::Extractor ex = net.create_extractor();
    ex.input("data", input);
    
    ncnn::Mat feature;
    ex.extract("fc1", feature);
    
    // 取出特征向量
    std::vector<float> featureVector((float*)(feature.data), (float*)(feature.data) + feature.w);
    
    // 计算特征向量的单位向量C*H*W
    std::vector<float> unitVector = calculateUnitVector(featureVector);
    
    return unitVector;
}
float dotProduct(const std::vector<float>& vec1, const std::vector<float>& vec2)
{
    
    
    if(vec1.size() != vec2.size())
    {
    
    
        std::cerr << "Vector sizes do not match." << std::endl;
        return 0.0f;
    }
    float result = 0.0f;
    for(size_t i = 0; i < vec1.size(); i++)
    {
    
    
        result += vec1[i] * vec2[i];
    }
    return result;
}
void fun(const std::string& path1, const std::string& path2) {
    
    

    auto v1=extractFeatureVector(path1);
    auto v2=extractFeatureVector(path2);
    std::cout << "Comparing " << path1 << " and " << path2 <<":"<<dotProduct(v1,v2)<< std::endl;
}
int main() {
    
    
    using namespace std;
    string folder = "/home/kylin/ncnnbuild2/ncnnwork/pic";
    vector<string> imagePaths;
    // 遍历文件夹，获取所有图片的路径
    cv::glob(folder, imagePaths);
    // 两两对比图片路径
    for (int i = 0; i < imagePaths.size(); ++i) {
    
    
        for (int j = i + 1; j < imagePaths.size(); ++j) {
    
    
            fun(imagePaths[i], imagePaths[j]);
        }
    }
    return 0;
}
#endif // Retinaface_RetinafacePostSelfPlug_H

【NCNN】arm架构cpu的硬件加速代码样例

目录

NCNN加速框架介绍

特点

示例代码

猜你喜欢