HiSilicon ss928 deploys handwritten digit recognition model

General process------------------------------------------------ -------------------------------------------------- -------------------

Model conversion ------------------------------------------------ -------------------------------------------------- --

1: Prepare the onnx model of MNIST ---> https://kdocs.cn/l/ctULnY8mxXuE

github address--> GitHub - warren-wzw/MNIST-pytorch

Set up the ATC environment --> https://kdocs.cn/l/cjeQxGjfojsX

First set the environment variables

source /home/warren/Ascend/ascend-toolkit/latest/x86_64-linux/bin/setenv.bash

conversion model

atc --model=/home/warren/Ascend/yolov5/model/yolov5s.onnx \

--soc_version=OPTG  --framework=5 \

--output=/home/warren/Ascend/yolov5/model/yolov5s \

--input_shape="input0:1,2,64,64"  

atc --model=/home/warren/ss928/NNN_PC/amct/amct_onnx/sample/MNIST/outputs/calibration/MNIST_deploy_model.onnx \

> --soc_version=OPTG  --framework=5 \

> --output=/home/warren/ss928/NNN_PC/amct/amct_onnx/sample/MNIST/outputs/calibration/MNIST \

> --input_shape="input0:1,3,640,640"

After the model conversion is successful, the following files are obtained:

        

Model quantification ------------------------------------------------ -------------------------------------------------- ----------

Directory Structure

import os
import argparse
import cv2
import numpy as np
import onnxruntime as ort
import time
import torch

import amct_onnx as amct

PATH = os.path.realpath('./')
DATA_DIR = os.path.join(PATH, 'data')
PARSER = argparse.ArgumentParser(description='amct_onnx MNIST quantization sample.')
ARGS = PARSER.parse_args()
OUTPUTS = os.path.join(PATH, 'outputs/calibration')
TMP = os.path.join(OUTPUTS, 'tmp')

def onnx_forward(onnx_model, batch_size=1, iterations=100):
    ort_session = ort.InferenceSession(onnx_model, amct.AMCT_SO)
    with open("./data/train-images-idx3-ubyte","rb") as f:
        file = f.read()
        num = -1
        j=0
        inference_time =[0]
        for j in range(100):
            num=num+1
            i = 16+784*num
            image1 = [int(str(item).encode('ascii'),16) for item in file[i:i+784]]
            input_data = np.array(image1,dtype=np.float32).reshape(1,1,28,28)
            #np.set_printoptions(linewidth=150)
            #print(input_data)
            input_name = ort_session.get_inputs()[0].name
            # inference
            start_time = time.time()
            output = ort_session.run(None, {input_name: input_data})
            end_time = time.time()
            inference_time.append(end_time - start_time)
            # 处理输出结果
            output = torch.tensor(output[0])  # 将输出转换为 PyTorch 张量
                #print(output_tensor)
            # 输出结果处理和后续操作...
            pred =np.argmax(output)
            print("------------------------The num of this pic is ",pred,"use time ",inference_time[num]*1000,"ms",j)

def main():
    model_file = './model/model.onnx'
    print('[INFO] Do original model test:')
    onnx_forward(model_file,1,1)
    config_json_file = os.path.join(TMP, 'config.json')
    skip_layers = []
    amct.create_quant_config(
            config_file=config_json_file, model_file=model_file, skip_layers=skip_layers, batch_num=1,
            activation_offset=True, config_defination=None)
    # Phase1: do conv+bn fusion, weights calibration and generate
    #         calibration model
    scale_offset_record_file = os.path.join(TMP, 'record.txt')
    modified_model = os.path.join(TMP, 'modified_model.onnx')
    amct.quantize_model(
        config_file=config_json_file, model_file=model_file, modified_onnx_file=modified_model,
        record_file=scale_offset_record_file)
    onnx_forward(modified_model, 32, 1)
    # Phase3: save final model, one for onnx do fake quant test, one
    #         deploy model for ATC
    result_path = os.path.join(OUTPUTS, 'MNIST')
    amct.save_model(modified_model, scale_offset_record_file, result_path)
    # Phase4: run fake_quant model test
    print('[INFO] Do quantized model test:')
    onnx_forward('%s_%s' % (result_path, 'fake_quant_model.onnx'), 1, 1)
if __name__ == '__main__':
    main()

Inference code writing ------------------------------------------------- -------------------------------------------------- --

Copy the official sample and change it to MNIST. The directory structure is as shown below

Change camke file

1: Add environment variables:

export DDK_PATH=$HOME/Ascend/ascend-toolkit/latest

export NPU_HOST_LIB=$DDK_PATH/runtime/lib64/stub

2: Create a build directory

mkdir -p build/intermediates/host

3:cmake ../../../src -DCMAKE_CXX_COMPILER=aarch64-mix210-linux-g++ -DCMAKE_SKIP_RPATH=TRUE

Copy the entire MNIST folder to the board end and add the environment variable of the library file path

export ASCEND_GLOBAL_EVENT_ENABLE=0

export ASCEND_AACPU_KERNEL_PATH=/opt/sd/lib

export ASCEND_AICPU_KERNEL_PATH=/opt/sd/lib

export LD_LIBRARY_PATH=/opt/sd/lib

Execute the executable file main

execution succeed.

Code explanation------------------------------------------------ --------------------------

Rough logic

main.cpp

#include "main.h"
#include "acl/acl.h"

#define INFO_LOG(fmt, ...)  fprintf(stdout, "[INFO]  " fmt "\n", ##__VA_ARGS__)
#define WARN_LOG(fmt, ...)  fprintf(stdout, "[WARN]  " fmt "\n", ##__VA_ARGS__)
#define ERROR_LOG(fmt, ...) fprintf(stderr, "[ERROR]  " fmt "\n", ##__VA_ARGS__)
const int MODEL_CHANNEL = 1;
const int MODEL_IN_WIDTH = 28;
const int MODEL_IN_HEIGHT = 28;
const int loop_count = 1000;

typedef enum Result {
    SUCCESS = 0,
    FAILED = 1
} Result;

static inline int64_t getCurrentTimeUs()
{
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_sec * 1000000 + tv.tv_usec;
}
void Load_data(int num,unsigned char * input_image)
{
    int j=16+784*num;
    FILE *file = fopen("../data/train-images-idx3-ubyte", "rb");
    if (file == NULL) {
        printf("can't open the file!\n");
    }
    fseek(file,j,SEEK_SET);
    fread(input_image,sizeof(char),784,file);
    //print
/*     for(int i=0;i<MODEL_IN_WIDTH;i++){
        for(int j=0;j<MODEL_IN_WIDTH;j++){
            printf("%4d",input_image[i*28+j]);
        }
        printf("\n");
    }  */
    fclose(file);
}
void Bubble_sort(float *buffer,int num)
{
    float temp;
    for(int i=0; i<num;i++){
        for(int j=0; j<num-i-1;j++){
            if(buffer[j]>buffer[j+1]){
                temp = buffer[j];
                buffer[j]=buffer[j+1];
                buffer[j+1]=temp;
            }
        }
    }
}
int main()
{
/***************************************************/
/*****************define var************************/
/***************************************************/
    int num=0;
    aclError ret=1;
    const char *aclConfigPath = "../src/acl.json";
    int32_t deviceId_=0;
    aclrtContext context_=nullptr;
    aclrtStream stream_=nullptr;
    aclrtRunMode runMode;
    uint32_t modelId_=0;
    const char* modelPath = "../model/MNIST.om";
    aclmdlDesc *modelDesc_;
    aclmdlDataset *output_;
    aclmdlDataset *input_;
    void * inputDataBuffer = nullptr;
    size_t size = 784;
    void* input_image_original;
    void* time_ori;
    int64_t sum=0;
    int64_t start_time=0;
    int64_t end_time=0;
/***************************************************/
/*****************Init ACL**************************/
/***************************************************/
    ret = aclInit(aclConfigPath);
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("acl init failed, errorCode = %d", static_cast<int32_t>(ret));
        return FAILED;
    }
    INFO_LOG("--------------------acl init success");
/***************************************************/
/*****************apply resource********************/
/***************************************************/
    // set device only one device    
    ret = aclrtSetDevice(deviceId_);
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("acl set device %d failed, errorCode = %d", deviceId_, static_cast<int32_t>(ret));
        return FAILED;
    }
    INFO_LOG("set device %d success", deviceId_);
    // create context (set current)
    ret = aclrtCreateContext(&context_, deviceId_);
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("acl create context failed, deviceId = %d, errorCode = %d",
            deviceId_, static_cast<int32_t>(ret));
        return FAILED;
    }
    INFO_LOG("create context success");
    // create stream
    ret = aclrtCreateStream(&stream_);
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("acl create stream failed, deviceId = %d, errorCode = %d",
            deviceId_, static_cast<int32_t>(ret));
        return FAILED;
    }
    INFO_LOG("create stream success");
    // get run mode
    ret = aclrtGetRunMode(&runMode);
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("acl get run mode failed, errorCode = %d", static_cast<int32_t>(ret));
        return FAILED;
    } 
/***************************************************/
/********load model and get infos of model**********/
/***************************************************/
    ret = aclmdlLoadFromFile(modelPath,&modelId_);
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("load model from file failed, model file is %s, errorCode is %d",
            modelPath, static_cast<int32_t>(ret));
        return FAILED;
    }
    INFO_LOG("load model %s success id is %d\n", modelPath,modelId_);
    
    //get model describe
    modelDesc_ = aclmdlCreateDesc();
    if (modelDesc_ == nullptr) {
        ERROR_LOG("create model description failed");
        return FAILED;
    }
    ret = aclmdlGetDesc(modelDesc_, modelId_);
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("get model description failed, modelId is %u, errorCode is %d",
            modelId_, static_cast<int32_t>(ret));
        return FAILED;
    }
    INFO_LOG("create model description success");
/***************************************************/
/******************print input tensor***************/
/***************************************************/     
/*     aclmdlIODims *dim;
    ret=aclmdlGetInputDims(modelDesc_,0,dim);
    printf("----------------in dims is %d \n",dim->dimCount);
    printf("----------------in dims name is: %s dims: \n",dim->name);
    for(int num=0;num<dim->dimCount;num++){
        printf("%d ",num,dim->dims[num]);
    }
    ret = aclmdlGetOutputDims(modelDesc_,0,dim);
    printf("----------------out dims is %d \n",dim->dimCount);
    printf("----------------out dims name is: %s dims:\n",dim->name);
    for(int num=0;num<dim->dimCount;num++){
        printf("%d \n",num,dim->dims[num]);
    } 
    deviceId_=0;*/
/***************************************************/
/******************prepare output data buffer***************/
/***************************************************/
    output_ = aclmdlCreateDataset();
    if (output_ == nullptr) {
        ERROR_LOG("can't create dataset, create output failed");
        return FAILED;
    }
    size_t outputSize = aclmdlGetNumOutputs(modelDesc_); 
    for (size_t i = 0; i < outputSize; ++i) {
        size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i);
        void *outputBuffer = nullptr;
        ret = aclrtMalloc(&outputBuffer, modelOutputSize, ACL_MEM_MALLOC_NORMAL_ONLY);
        if (ret != ACL_SUCCESS) {
            ERROR_LOG("can't malloc buffer, size is %zu, create output failed, errorCode is %d",
                modelOutputSize, static_cast<int32_t>(ret));
            return FAILED;
        }
        //apply output buffer
        aclDataBuffer *outputData = aclCreateDataBuffer(outputBuffer, modelOutputSize);
        if (outputData == nullptr) {
            ERROR_LOG("can't create data buffer, create output failed");
            (void)aclrtFree(outputBuffer);
            return FAILED;
        }
        ret = aclmdlAddDatasetBuffer(output_, outputData);
        if (ret != ACL_SUCCESS) {
            ERROR_LOG("can't add data buffer, create output failed, errorCode is %d",
                static_cast<int32_t>(ret));
            (void)aclrtFree(outputBuffer);
            (void)aclDestroyDataBuffer(outputData);
            return FAILED;
        }
    }
    INFO_LOG("create model output success");
/***************************************************/
/******************prepare input data***************/
/***************************************************/    
     if (modelDesc_ == nullptr) {
        ERROR_LOG("no model description, create input failed");
        return FAILED;
    }
    input_ = aclmdlCreateDataset();
    if (input_ == nullptr) {
        ERROR_LOG("can't create dataset, create input failed");
        return FAILED;
    }
    size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, 0);
    ret = aclrtMalloc(&input_image_original, 784, ACL_MEM_MALLOC_NORMAL_ONLY);
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("malloc device buffer failed. size is %zu, errorCode is %d",
            size, static_cast<int32_t>(ret));
        return FAILED;
    }
    unsigned char * input_image = static_cast<unsigned char*>(input_image_original);
    void* input_image_float_ori;
    ret = aclrtMalloc(&input_image_float_ori, 784*sizeof(float), ACL_MEM_MALLOC_NORMAL_ONLY);
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("malloc device buffer failed. size is %zu, errorCode is %d",
            size, static_cast<int32_t>(ret));
        return FAILED;
    }
    float * input_image_float = static_cast<float*>(input_image_float_ori);;
    Load_data(num,input_image);
    for(int num=0;num<784;num++){
        input_image_float[num]=(float)input_image[num];
    }
/*     aclrtFree(input_image);
    input_image=nullptr; */
    aclDataBuffer *inputData = aclCreateDataBuffer(input_image_float, modelInputSize);
    if (inputData == nullptr) {
        ERROR_LOG("can't create data buffer, create input failed");
        return FAILED;
    }
    ret = aclmdlAddDatasetBuffer(input_, inputData);
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("add input dataset buffer failed, errorCode is %d", static_cast<int32_t>(ret));
        (void)aclDestroyDataBuffer(inputData);
        inputData = nullptr;
        return FAILED;
    }
    INFO_LOG("create model input success");
    ret = aclrtMalloc(&time_ori, loop_count*sizeof(int64_t), ACL_MEM_MALLOC_NORMAL_ONLY);
        if (ret != ACL_SUCCESS) {
            ERROR_LOG("malloc device buffer failed. size is %zu, errorCode is %d",
                loop_count*sizeof(int64_t), static_cast<int32_t>(ret));
            return FAILED;
        }
        int64_t * time = static_cast<int64_t*>(time_ori);
    for(int loop_time=0;loop_time < loop_count;loop_time++){
        num++;
        Load_data(num,input_image);
        for(int loop_num=0;loop_num<784;loop_num++){
            input_image_float[loop_num]=(float)input_image[loop_num];
        }
        void* data = aclGetDataBufferAddr(inputData);
        uint32_t len = aclGetDataBufferSizeV2(inputData);     
        float *indata = NULL;  
        indata = reinterpret_cast<float*>(data);  
    /***************************************************/
    /******************inference************************/
    /***************************************************/
        start_time = getCurrentTimeUs();
        ret = aclmdlExecute(modelId_, input_, output_);
        end_time = getCurrentTimeUs();
        time[loop_time]=end_time-start_time;
        sum=sum+time[loop_time];
        printf("---Elapse Time = %.3f ms \n", (end_time-start_time) / 1000.f);   
/***************************************************/
/******************post process*********************/
/***************************************************/
        // get model output data
        aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, 0);
        void* data_1 = aclGetDataBufferAddr(dataBuffer);
        uint32_t len_1 = aclGetDataBufferSizeV2(dataBuffer);     
        float *outData = NULL;  
        outData = reinterpret_cast<float*>(data_1);  
        void* buffer_copy_ori;
        ret = aclrtMalloc(&buffer_copy_ori, len_1*sizeof(float), ACL_MEM_MALLOC_NORMAL_ONLY);
        if (ret != ACL_SUCCESS) {
            ERROR_LOG("malloc device buffer failed. size is %zu, errorCode is %d",
                len_1, static_cast<int32_t>(ret));
            return FAILED;
        }
        float * buffer_copy = static_cast<float*>(buffer_copy_ori);
        for(int i_1 = 0; i_1 < len_1/sizeof(*outData);i_1++){
            buffer_copy[i_1]=outData[i_1];
        }
        Bubble_sort(outData,len_1/sizeof(*outData));
        for(int i_2 =0;i_2<len_1/sizeof(*outData);i_2++){
            if(buffer_copy[i_2]==outData[9]){
                printf("------------------------------------------%d time the pic value is %d \n",loop_time,i_2);
            }
        } 
        aclrtFree(buffer_copy);
        buffer_copy=nullptr;
    }
    printf("--------loop %d times sum is %.4f ms average time is %.3f ms\n", loop_count,sum / 1000.f,(sum / 1000.f)/loop_count);
    aclrtFree(time);
    time=nullptr;
    aclrtFree(input_image);
    input_image=nullptr;
    aclrtFree(input_image_float);
    input_image_float=nullptr;
    
/***************************************************/
/*******************destroy model input*************/
/***************************************************/
    for(size_t i = 0; i < aclmdlGetDatasetNumBuffers(input_); ++i) {
        aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(input_, i);
        (void)aclDestroyDataBuffer(dataBuffer);
    }
    (void)aclmdlDestroyDataset(input_);
    input_ = nullptr;
    INFO_LOG("destroy model input success");
/***************************************************/
/*********************destroy model output*********/
/***************************************************/
    for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(output_); ++i) {
        aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, i);
        void* data = aclGetDataBufferAddr(dataBuffer);
        (void)aclrtFree(data);
        (void)aclDestroyDataBuffer(dataBuffer);
    }
    (void)aclmdlDestroyDataset(output_);
    output_ = nullptr;
    INFO_LOG("destroy model output success");
/***************************************************/
/******uninstall model and release resource*********/
/***************************************************/
    modelId_=1;
    ret = aclmdlUnload(modelId_);
    // releasemodelDesc_
    if (modelDesc_ != nullptr) {
        aclmdlDestroyDesc(modelDesc_);
        modelDesc_ = nullptr;
    }
    INFO_LOG("unload model success, modelId is %u", modelId_);
    // release resorce
    if (stream_ != nullptr) {
        ret = aclrtDestroyStream(stream_);
        if (ret != ACL_SUCCESS) {
            ERROR_LOG("destroy stream failed, errorCode = %d", static_cast<int32_t>(ret));
        }
        stream_ = nullptr;
    }
    INFO_LOG("end to destroy stream");

    if (context_ != nullptr) {
        ret = aclrtDestroyContext(context_);
        if (ret != ACL_SUCCESS) {
            ERROR_LOG("destroy context failed, errorCode = %d", static_cast<int32_t>(ret));
        }
        context_ = nullptr;
    }
    INFO_LOG("end to destroy context");
    
    ret = aclrtResetDevice(deviceId_);
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("reset device %d failed, errorCode = %d", deviceId_, static_cast<int32_t>(ret));
    }
    INFO_LOG("end to reset device %d", deviceId_);

    ret = aclFinalize();
    if (ret != ACL_SUCCESS) {
        ERROR_LOG("finalize acl failed, errorCode = %d", static_cast<int32_t>(ret));
    }
    INFO_LOG("end to finalize acl");
}

Results of the:

fp32

you8

Guess you like

Origin blog.csdn.net/warren103098/article/details/132238481