NVDLA的SDK代码整理一


前言

这里主要借鉴这位大佬开放的代码,当然,NVDLA官方也开放了一些代码。我分析sdk的内容将会多提点有意思的东西!


一、文件情况

如果之前写过sdk,那么接下来将会十分容易。在大佬提供的sdk_sanity代码中,包含以下文件:

Image_q_dog_HW.bin
main.c
nv_small_run.c
nvdla_state.c
opendla.h
platform_config.h
platform.c
platform.h

很显然platform_config.hplatform.cplatform.h是sdk工程的基本代码,上面的main.c大概率也是helloworld模板创建得到的!那么因此我们可以锁定几个待分析的文件目标:

main.c
nv_small_run.c
nvdla_state.c
opendla.h

二、main.c代码分析

1. 各种各样的函数

我们还是直接看定义了哪些函数,这边跳过了和PWM相关的函数。
首先是reg_write函数:

void reg_write(unsigned int reg_num, int value)
{
    
    
    Xil_Out32(NVDLA_BASE_ADDRESS + reg_num, value);
    //printf("write_reg[%x] = %x\n", nvdla_csb_addr+reg_num, value);
}

其次是reg_read函数:

int reg_read(unsigned int reg_num)
{
    
    
    int data;
    data = Xil_In32(NVDLA_BASE_ADDRESS + reg_num);
    return data;
    //printf("write_reg[%x] = %x\n", nvdla_csb_addr+reg_num, value);
}

然后是poll_reg_equal函数:

void poll_reg_equal(unsigned int reg_num, int expect_value)
{
    
    
    int data;

    //printf("poll_reg cond=%x, reg_num=%x, expect_value=%x, field=%x\n", condition, reg_num, expect_value, field);

    while (1)
    {
    
    
        data = Xil_In32(NVDLA_BASE_ADDRESS + reg_num);

        if ((data & 0xffffffff) == expect_value)
        {
    
    
            break;
        }
    }
}

另外2个相似的函数,也就是poll_field_not_equal函数和poll_field_equal函数:

void poll_field_not_equal(unsigned int reg_num, int field, int expect_value)
{
    
    
    int data;

    //printf("poll_reg cond=%x, reg_num=%x, expect_value=%x, field=%x\n", condition, reg_num, expect_value, field);

    while (1)
    {
    
    
        data = Xil_In32(NVDLA_BASE_ADDRESS + reg_num);

        if ((data & field) != expect_value)
        {
    
    
            break;
        }
    }
}
void poll_field_equal(unsigned int reg_num, int field, int expect_value)
{
    
    
    int data;

    //printf("poll_reg cond=%x, reg_num=%x, expect_value=%x, field=%x\n", condition, reg_num, expect_value, field);

    while (1)
    {
    
    
        data = Xil_In32(NVDLA_BASE_ADDRESS + reg_num);

        if ((data & field) == expect_value)
        {
    
    
            break;
        }
    }
}

很直观看到最后三个函数都是用来检测寄存器值和期望值相等还是不相等!这个操作让我莫名想到线程间同步的软硬协同的原子操作,按下不表。
好了,接下来要回答两个问题:

1、这些函数在哪里被调用?
2、为什么作者能想到这些函数?

我们试图理解清楚大佬写代码的意图,先回答第一个问题:
1、nvdla_state.c中调用了reg_readpoll_field_equal函数。

#include "opendla.h"

int nvdla_is_busy()
{
    
    
    if ((reg_read(NVDLA_CDMA_S_STATUS_0) & 0x30003)
    ||  (reg_read(NVDLA_CMAC_A_S_STATUS_0) & 0x30003)
    ||  (reg_read(NVDLA_CMAC_B_S_STATUS_0) & 0x30003)
    ||  (reg_read(NVDLA_CACC_S_STATUS_0) & 0x30003)
    ||  (reg_read(NVDLA_CSC_S_STATUS_0) & 0x30003)
    ||  (reg_read(NVDLA_SDP_RDMA_S_STATUS_0) & 0x30003)
    ||  (reg_read(NVDLA_SDP_S_STATUS_0) & 0x30003)
    ||  (reg_read(NVDLA_PDP_S_STATUS_0) & 0x30003)){
    
    
        return 1;

    }else{
    
    
        return 0;
    }
}

void nvdla_wait_for_ready()
{
    
    
    poll_field_equal(NVDLA_CDMA_S_STATUS_0, 0x30003,0);
    poll_field_equal(NVDLA_CMAC_A_S_STATUS_0, 0x30003,0);
    poll_field_equal(NVDLA_CMAC_B_S_STATUS_0, 0x30003,0);
    poll_field_equal(NVDLA_CACC_S_STATUS_0, 0x30003,0);
    poll_field_equal(NVDLA_CSC_S_STATUS_0, 0x30003,0);
    poll_field_equal(NVDLA_SDP_RDMA_S_STATUS_0, 0x30003,0);
    poll_field_equal(NVDLA_SDP_S_STATUS_0, 0x30003,0);
    poll_field_equal(NVDLA_PDP_S_STATUS_0, 0x30003,0);
}
  1. nv_small_run.c中调用了reg_write函数,如下:
//dla_copy
#include "xparameters.h"
#include "xil_io.h"
#include "xstatus.h"
#include "xscugic.h"
#include "xil_exception.h"
#include "opendla.h"

/************************** Constant Definitions *****************************/
/* The following constant maps to the name of the hardware instances that
 * were created in the Vivado system design. */

#define NVDLA_BASE_ADDRESS XPAR_NV_NVDLA_WRAPPER_0_BASEADDR
#define PS_DDR0_BASE_ADDRESS XPAR_PS7_DDR_0_S_AXI_BASEADDR
#define base_addr 0x20000000

int nv_small_run(void)
{
    
    
    unsigned int memory_value;

    /*
    printf("***********************\n");
    printf("Begin NVDLA NV_SMALL Register Setting\n");
    printf("***********************\n");
    */
    //mem_load(base_addr + 0x0, "CONV_SDP_0_input.dat");
    //mem_load(base_addr + 0x40000, "CONV_SDP_0_weight.dat");
    reg_write(NVDLA_SDP_S_POINTER_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_S_POINTER_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_LO_START_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_ACCESS_CFG_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_LE_SLOPE_SCALE_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_LO_SLOPE_SCALE_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_LE_END_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_ACCESS_DATA_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_INFO_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_LO_SLOPE_SHIFT_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_LE_START_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_CFG_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_LE_SLOPE_SHIFT_0, 0x0);
    reg_write(NVDLA_SDP_S_LUT_LO_END_0, 0x0);
    reg_write(NVDLA_SDP_D_CVT_OFFSET_0, 0x0);
    reg_write(NVDLA_SDP_D_DST_DMA_CFG_0, 0x1);
    reg_write(NVDLA_SDP_RDMA_D_SRC_SURFACE_STRIDE_0, 0x188000);
    reg_write(NVDLA_SDP_D_DST_LINE_STRIDE_0, 0xe00);
    reg_write(NVDLA_SDP_RDMA_D_SRC_LINE_STRIDE_0, 0xe00);
    reg_write(NVDLA_SDP_RDMA_D_SRC_BASE_ADDR_HIGH_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_BRDMA_CFG_0, 0x1);
    reg_write(NVDLA_SDP_RDMA_D_BS_BATCH_STRIDE_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_EW_ALU_CVT_SCALE_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_EW_LINE_STRIDE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_BN_BASE_ADDR_HIGH_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_EW_ALU_SRC_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_NRDMA_CFG_0, 0x1);
    reg_write(NVDLA_SDP_D_DP_EW_MUL_CVT_SCALE_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_BN_BATCH_STRIDE_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_EW_MUL_CVT_TRUNCATE_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_EW_BASE_ADDR_HIGH_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_EW_MUL_CFG_0, 0x2);
    reg_write(NVDLA_SDP_D_DATA_CUBE_WIDTH_0, 0x1bf);
    reg_write(NVDLA_SDP_D_DP_BN_ALU_CFG_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_EW_BASE_ADDR_LOW_0, base_addr + 0x80000000);
    reg_write(NVDLA_SDP_D_DATA_CUBE_CHANNEL_0, 0x7);
    reg_write(NVDLA_SDP_D_DATA_CUBE_HEIGHT_0, 0x1bf);
    reg_write(NVDLA_SDP_D_DP_BS_MUL_SRC_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_BN_LINE_STRIDE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_BS_BASE_ADDR_HIGH_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_EW_ALU_CVT_OFFSET_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_DATA_CUBE_CHANNEL_0, 0x7);
    reg_write(NVDLA_SDP_D_DP_BN_MUL_CFG_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_BN_MUL_SRC_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_BS_BASE_ADDR_LOW_0, base_addr + 0x80000000);
    reg_write(NVDLA_SDP_RDMA_D_ERDMA_CFG_0, 0x1);
    reg_write(NVDLA_SDP_D_DST_BASE_ADDR_HIGH_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_BS_SURFACE_STRIDE_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_EW_MUL_SRC_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_BS_LINE_STRIDE_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_BS_ALU_SRC_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_D_CVT_SHIFT_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_EW_ALU_CFG_0, 0x2);
    reg_write(NVDLA_SDP_D_DST_SURFACE_STRIDE_0, 0x188000);
    reg_write(NVDLA_SDP_D_FEATURE_MODE_CFG_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_FEATURE_MODE_CFG_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_BS_CFG_0, 0x73);
    reg_write(NVDLA_SDP_D_CVT_SCALE_0, 0x1);
    reg_write(NVDLA_SDP_D_DP_BN_CFG_0, 0x53);
    reg_write(NVDLA_SDP_D_DP_BN_ALU_SRC_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_SRC_DMA_CFG_0, 0x1);
    reg_write(NVDLA_SDP_D_DST_BATCH_STRIDE_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_BS_ALU_CFG_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_BN_SURFACE_STRIDE_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_EW_MUL_CVT_OFFSET_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_EW_SURFACE_STRIDE_0, 0x0);
    reg_write(NVDLA_SDP_D_DST_BASE_ADDR_LOW_0, base_addr + 0x200000);
    reg_write(NVDLA_SDP_RDMA_D_EW_BATCH_STRIDE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_PERF_ENABLE_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_EW_ALU_CVT_TRUNCATE_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_DATA_CUBE_HEIGHT_0, 0x1bf);
    reg_write(NVDLA_SDP_D_PERF_ENABLE_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_EW_TRUNCATE_VALUE_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_SRC_BASE_ADDR_LOW_0, base_addr + 0x0);
    reg_write(NVDLA_SDP_RDMA_D_BN_BASE_ADDR_LOW_0, base_addr + 0x80000000);
    reg_write(NVDLA_SDP_D_DP_BS_MUL_CFG_0, 0x0);
    reg_write(NVDLA_SDP_D_DP_EW_CFG_0, 0x53);
    reg_write(NVDLA_SDP_D_DATA_FORMAT_0, 0x0);
    reg_write(NVDLA_SDP_RDMA_D_DATA_CUBE_WIDTH_0, 0x1bf);
    reg_write(NVDLA_SDP_RDMA_D_OP_ENABLE_0, 0x1);
    reg_write(NVDLA_SDP_D_OP_ENABLE_0, 0x1);

    printf("***********************\n");
    printf("Finish NVDLA NV_SMALL Register Setting\n");
    printf("***********************\n");

    /*	    
	    //get value
    	memory_value= memory_get(NVDLA_BASE_ADDRESS, NVDLA_SDP_RDMA_D_BS_BATCH_STRIDE_0);
    	printf("NVDLA_SDP_RDMA_D_BS_BATCH_STRIDE_0 (0x%08x@0x%02x):\n", memory_value, NVDLA_BASE_ADDRESS + NVDLA_SDP_RDMA_D_BS_BATCH_STRIDE_0);
*/
}

明眼人也看得出来大佬定义的poll_reg_equalpoll_field_not_equal这俩函数没用上?那大佬的思路怎么来的呢?我不是大佬当事人,我只能做合理推测!很简单,回到官网找证据!

2. 为什么会想到定义上面眼花缭乱的函数?

/doc/hw/v2/verif_guide.rst中其实透露了细节。官方建议是9类寄存器操作,如下:

Name Description Syntax Usage
reg_write Write data to specific DUT register reg_write(reg_n ame, reg_value); Fundamental operation for register configuration
reg_read_expected Read data from specific DUT register, compare with expected value reg_read_expect ed(addr, expected_data); For some special cases like register accessing tests
reg_read Read data from specific DUT register reg_read(reg_na me, return_value); For specific cases which may need to do post-processing on read return value.
sync_notify Specified player sequencer will send out synchronization event sync_notify(tar get_resource, sync_id); CC pipeline, OP_EN configuration order, CACC->CMAC->CSC .
sync_wait Specified player sequencer will wait on synchronization event sync_wait(targe t_resource, sync_id); CC pipeline, OP_EN configuration order, CACC->CMAC->CSC .
intr_notify Monitor DUT interrupt, catch and clear interrupt and send synchronization event. There could be multiple intr_notify, all those intr_notify are processed sequentially. The processing order is the same as commands’ line order in configuration file. intr_notify(int r_id, sync_id); // notify when specific interrupt fired Hardware layer complete notification, informing test bench that test is ended. Multi-layer test which is presumed containing layer 0 ~ N, for n >1 layers, they shall wait for interrupts.
poll Continues poll register/field value from DUT, until one of the following conditions are met:1、Equal, polled value is equal to expected value 2、Greater, polled value is greater than expected value 3、Less, polled value is less than expected value 4、Not equal, polled value is not equal to expected value 5、Not greater, polled value is not greater than expected value 6、Not less, polled value is not less than expected value poll_field_equal(target_resource, register_name, field_name, expected_value) ; poll_reg_equal( target_resource , register_name, expected_value); poll_field_greater(target_resource, register_name, field_name, expected_value) ; poll_reg_less(target_resource, register_name, expected_value) ; poll_field_nt_ greater(taget_resource, register_name, field_name, expected_value) ; poll_reg_not_less(target_resource, register_name, expected_value) ; Convolution case, wait until CBUF flush has done
check Invoke player result checking method. When test bench works in RTL/CMOD cross checking mode, neither golden CRC nor golden files are necessary in this case. Method check_nothing() shall be added to trace file to indicated test end event. check_crc(syn_ id, memory_type, base_address, size, golden_crc_value); check_file(sync_id, memory_type, base_address, size, "golden_file_name"); check_nothing(sync_id); CRC check for no CMOD simulation (usually generated by arch/inherit from previous project/eyeball gilded) Golden memory result check for no CMOD simulation (usually generated by arch/inherit from previous project/eyeball gilded)
mem Load memory from file.Initialize memory by pattern. mem_load(ram_ty pe, base_addr, file_path); // file_path shall be enclosed by “” mem_init(ram_ty pe, base_addr, size, pattern);

因此答案破解,那我们需要看看这些函数都在哪些文件中用到?可以在/hw-nv_small/verif/tests/trace_tests/nv_small/cdp_1x1x1_lrn3_int8_0中看到些许痕迹,如下:

mem_init(pri_mem, 0x80000c00, 0x800, ALL_ZERO);
mem_load(pri_mem, 0x80000c00, "cdp_1x1x1_lrn3_int8_0_in.dat");
mem_init(pri_mem, 0x80000020, 0x800, ALL_ZERO);
reg_write(NVDLA_CDP.S_POINTER_0, 0x0);
reg_write(NVDLA_CDP.D_PERF_LUT_HYBRID_0, 0x0);
......
......
reg_write(NVDLA_CDP_RDMA.D_PERF_READ_STALL_0, 0x0);
reg_write(NVDLA_CDP_RDMA.D_OPERATION_MODE_0, 0x0);
reg_write(NVDLA_CDP_RDMA.D_SRC_LINE_STRIDE_0, 0x100);
reg_write(NVDLA_CDP_RDMA.D_SRC_DMA_CFG_0, 0x1);
reg_write(NVDLA_CDP_RDMA.D_SRC_BASE_ADDR_HIGH_0, 0x0);
reg_write(NVDLA_CDP.D_OP_ENABLE_0, 0x1);
reg_write(NVDLA_CDP_RDMA.D_OP_ENABLE_0, 0x1);
intr_notify(CDP_0, sync_id_0);
check_crc(sync_id_0, 1, 0x80000020, 0x800, 0xf1e8ba9e);

所以接下来最重要的就是寄存器到底是什么含义?以及我们是否有比较简单的方法去看懂寄存器?这个不是本篇的内容,留在下一篇!

3. main主体函数部分

接着看main函数主体部分:

int main(void)
{
    
    

    cleanup_platform();
    unsigned int memory_value;
    unsigned int memory_value1;
    unsigned int memory_value2;
    int status = XST_SUCCESS;
    u32 value = 0;
    u32 period = 0;
    brightness = 0;
    unsigned int i;

    //0x40000000, "Image_q_dog_HW.bin", 1605632 bytes
    //0x40200000, "Image_q_dog_HW.bin", 1605632 bytes

    //byte
    int num;

    //byte
    num = 1605632;

    u32 *source, *destination;

    source = (u32 *)0x30000000; //golden output
    destination = (u32 *)0x20200000;
    xil_printf("HW Version is %x \r\n", reg_read(NVDLA_CFGROM_CFGROM_HW_VERSION_0));
    xil_printf("HW Version is %x.%x \r\n", reg_read(NVDLA_GLB_S_NVDLA_HW_VERSION_0)>>NVDLA_GLB_S_NVDLA_HW_VERSION_0_MAJOR_FIELD,reg_read(NVDLA_GLB_S_NVDLA_HW_VERSION_0)>>NVDLA_GLB_S_NVDLA_HW_VERSION_0_MINOR_FIELD);

    printf("***********************\n");
    printf("Begin NVDLA NV_SMALL Register Setting\n");
    printf("***********************\n");

    nv_small_run(); 
    nvdla_wait_for_ready(); 

    //word
    for (i = 0; i < num / 4; i++)
    {
    
    
        if (destination[i] != source[i])
        {
    
    
            xil_printf("Data match failed at = %d, source data = %d, destination data = %d\n\r", i, source[i], destination[i]);
            //print("-- Exiting main() --");
            //return XST_FAILURE;
        }
    }

    printf("DLA Copy Test Success\n ");

    //    }

    return status;
}

中规中矩,思路很好理解!作者写这段测试代码的目的是:能够使用SDP将内存上的一段数据搬移到另一段去,所以有个srcdest的两个地址,中间2个xil_printf就是输出版本信息,这个信息可以回到opendla_small.h中寻找。注意这个文件怎么来的?其实这个文件在内核态驱动代码中也出现过一次,名称为opendla.h,两个文件除了一部分宏的名称不一样之外,完全行使了同一功能。随后使用nv_small_runnvdla_wait_for_ready来初始化,中间的for循环则是完成数据拷贝后的检验,看看地址究竟是否对应上了。


总结

本节简简单单回顾了nvdlasdk测试代码,将其中的函数设计来源和函数想实现的功能都一一叙述,不过从上面的代码也可以看到,sdk最重要的不是别的,而是一个又一个的寄存器。那么接下来就该几个问题。

1、怎么确定哪些寄存器是需要的?
2、寄存器的取值?
3、开发板如果迁移以后,怎么去迁移寄存器?

以上三个问题放在下一篇。

猜你喜欢

转载自blog.csdn.net/weixin_41029027/article/details/134789375
今日推荐