NVDLA系列之C-model:cvif<101>

NV_NVDLA_cvif.cpp

cdp2cvif_wr_req_b_transport

void NV_NVDLA_cvif::cdp2cvif_wr_req_b_transport(int ID, nvdla_dma_wr_req_t* payload, sc_time& delay) {
    
    
    uint32_t packet_id;
    uint8_t  *dma_payload_data_ptr;
    uint8_t  *data_ptr;
    uint32_t rest_size, incoming_size;
    client_cvif_wr_req_t * cdp_wr_req;

    packet_id = payload->tag;
    if (TAG_CMD == packet_id) {
    
    
        cdp_wr_req_count_ ++;
#pragma CTC SKIP
        if (true == has_cdp_onging_wr_req_) {
    
    
            FAIL(("NV_NVDLA_cvif::cdp2cvif_wr_req_b_transport, got two consective command request, one command request shall be followed by one or more data request."));
        }
#pragma CTC ENDSKIP
	else {
    
    
            has_cdp_onging_wr_req_ = true;
        }

        cdp_wr_req = new client_cvif_wr_req_t;
        cdp_wr_req->addr  = payload->pd.dma_write_cmd.addr;
        cdp_wr_req->size  = (payload->pd.dma_write_cmd.size + 1) * DMA_TRANSACTION_ATOM_SIZE;    //In byte
        cdp_wr_req->require_ack = payload->pd.dma_write_cmd.require_ack;
        cslDebug((50, "before write to cdp2cvif_wr_cmd_fifo_\x0A"));
        cdp2cvif_wr_cmd_fifo_->write(cdp_wr_req);
        cslDebug((50, "after write to cdp2cvif_wr_cmd_fifo_\x0A"));
        cdp_wr_req_got_size_ = 0;
        cdp_wr_req_size_ = cdp_wr_req->size;

    } else {
    
    
        dma_payload_data_ptr = reinterpret_cast <uint8_t *> (payload->pd.dma_write_data.data);
        rest_size = cdp_wr_req_size_ - cdp_wr_req_got_size_;
        incoming_size = min(rest_size, uint32_t (DMA_TRANSACTION_MAX_SIZE));
        data_ptr = new uint8_t[DMA_TRANSACTION_ATOM_SIZE];
        memcpy(data_ptr, dma_payload_data_ptr, DMA_TRANSACTION_ATOM_SIZE);
        cslDebug((50, "before write to cdp2cvif_wr_data_fifo_\x0A"));
        cdp2cvif_wr_data_fifo_->write(data_ptr);   // Write to FIFO in 32Byte atom
        cslDebug((50, "after write to cdp2cvif_wr_data_fifo_\x0A"));
        cdp_wr_req_got_size_ += incoming_size;
        for(int i = 0; i < DMA_TRANSACTION_ATOM_SIZE; i++) {
    
    
            cslDebug((50, "%x ", data_ptr[i]));
        }
        cslDebug((50, "\x0A"));
        if (incoming_size==DMA_TRANSACTION_MAX_SIZE) {
    
     // The payload is 64B
            data_ptr = new uint8_t[DMA_TRANSACTION_ATOM_SIZE];
            memcpy(data_ptr, &dma_payload_data_ptr[DMA_TRANSACTION_ATOM_SIZE], DMA_TRANSACTION_ATOM_SIZE);
            cslDebug((50, "write to cdp2cvif_wr_data_fifo_\x0A"));
            cdp2cvif_wr_data_fifo_->write(data_ptr);
            for(int i = 0; i < DMA_TRANSACTION_ATOM_SIZE; i++) {
    
    
                cslDebug((50, "%x ", data_ptr[i]));
            }
            cslDebug((50, "\x0A"));
        }

        if (cdp_wr_req_got_size_ == cdp_wr_req_size_) {
    
    
            has_cdp_onging_wr_req_ = false;
        }
    }
}

函数cdp2cvif_wr_req_b_transport用于处理来自跨通道数据处理器(CDP)到卷积接口(CVIF)的写请求传输。主要功能如下:

  1. 解析传入参数,包括请求的tag,通常用于标识请求类型。
  2. 如果请求类型为命令请求(TAG_CMD),则执行以下操作:
    • 检查是否存在连续的命令请求,如果存在则抛出错误。
    • 创建新的cdp_wr_req对象,包括地址、大小和是否需要确认等信息。
    • 将cdp_wr_req对象写入cdp2cvif_wr_cmd_fifo_,以保存写请求相关信息。
    • 重置已接收数据大小(cdp_wr_req_got_size_)和请求数据大小(cdp_wr_req_size_)。
  3. 对于数据请求,执行以下操作:
    • 从payload中获取数据指针dma_payload_data_ptr。
    • 计算尚未接收的数据大小(rest_size)和本次实际接收的数据大小(incoming_size)。
    • 创建新的uint8_t类型的data_ptr,并从dma_payload_data_ptr中复制数据到data_ptr中。
    • 将data_ptr写入cdp2cvif_wr_data_fifo_,以32字节为单位。
    • 更新cdp_wr_req_got_size_以跟踪已接收的数据大小。
    • 如果incoming_size等于DMA_TRANSACTION_MAX_SIZE(64字节),则继续接收数据并写入cdp2cvif_wr_data_fifo_。
    • 如果已接收的数据大小等于请求的数据大小(cdp_wr_req_size_),则表示请求完成,将has_cdp_onging_wr_req_设置为false。

这个函数的主要目的是管理来自CDP到卷积接口(CVIF)之间的写请求传输,包括命令和数据请求,并将相关信息存储到相应的FIFO队列中。此外,它还维护了请求的连续性,以确保数据的正确传输。

WriteRequest_cdp2cvif


void NV_NVDLA_cvif::WriteRequest_cdp2cvif() {
    
    
    uint64_t base_addr;
    uint64_t first_base_addr;
    uint64_t last_base_addr;
    uint64_t cur_address;
    uint32_t size_in_byte;
    uint32_t total_axi_size;
    uint64_t payload_addr;
    uint32_t payload_size;
    uint8_t* axi_byte_enable_ptr;
    uint32_t byte_iter;
    uint32_t atom_iter;
    uint32_t atom_num;
    bool     is_base_64byte_align;
    bool     is_rear_64byte_align;
    bool     is_read=false;
    uint8_t  *axi_atom_ptr;
    nvdla_dbb_extension *nvdla_dbb_ext = NULL;
    client_cvif_wr_req_t   * cdp_wr_req;
    dla_b_transport_payload *bt_payload;

    while(true) {
    
    
        // Read one write command
        cdp_wr_req = cdp2cvif_wr_cmd_fifo_->read();
        payload_addr = cdp_wr_req->addr;   // It's aligend to 32B, not 64B
        payload_size = cdp_wr_req->size;
        cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, got one write command from cdp2cvif_wr_cmd_fifo_\x0A"));
        cslDebug((50, "    payload_addr: 0x%lx\x0A", payload_addr));
        cslDebug((50, "    payload_size: 0x%x\x0A", payload_size));

        is_base_64byte_align = payload_addr%AXI_TRANSACTION_ATOM_SIZE == 0;
        first_base_addr = is_base_64byte_align? payload_addr: payload_addr - DMA_TRANSACTION_ATOM_SIZE; // Align to 64B
        is_rear_64byte_align = (payload_addr + payload_size) % AXI_TRANSACTION_ATOM_SIZE == 0;
        // According to DBB_PV standard, data_length shall be equal or greater than DBB_PV m_size * m_length no matter the transactions is aglined or not
        total_axi_size = payload_size + (is_base_64byte_align? 0: DMA_TRANSACTION_ATOM_SIZE) + (is_rear_64byte_align? 0: DMA_TRANSACTION_ATOM_SIZE);
        last_base_addr = first_base_addr + total_axi_size - AXI_TRANSACTION_ATOM_SIZE;
        // if ( total_axi_size <= AXI_TRANSACTION_ATOM_SIZE ) {
    
    
        //     // The first and last transaction is actually the same
        //     last_base_addr = first_base_addr;
        // } else {
    
    
        //     last_base_addr = (first_base_addr + total_axi_size) - (first_base_addr + total_axi_size)%AXI_TRANSACTION_ATOM_SIZE;
        // }
        // if (total_axi_size + first_base_addr%CVIF_MAX_MEM_TRANSACTION_SIZE <= CVIF_MAX_MEM_TRANSACTION_SIZE) {
    
    
        //     // Base and last are in the same AXI transaction
        // } else {
    
    
        //     // Base and last are in different AXI transaction
        //     last_base_addr = 
        // }
        // } else if ((first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE != 0) {
    
    
        //     if (total_axi_size >= (first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE) {
    
    
        //         last_base_addr = first_base_addr + total_axi_size - (first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE;
        //     } else {
    
    
        //         last_base_addr = first_base_addr;
        //     }
        // } else {
    
    
        //     if (total_axi_size >= CVIF_MAX_MEM_TRANSACTION_SIZE) {
    
    
        //         last_base_addr = first_base_addr + total_axi_size - CVIF_MAX_MEM_TRANSACTION_SIZE;
        //     } else {
    
    
        //         last_base_addr = first_base_addr;
        //     }
        // }
        cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif:\x0A"));
        cslDebug((50, "    first_base_addr: 0x%lx\x0A", first_base_addr));
        cslDebug((50, "    last_base_addr: 0x%lx\x0A", last_base_addr));
        cslDebug((50, "    total_axi_size: 0x%x\x0A", total_axi_size));

        // cur_address = payload_addr;
        cur_address = is_base_64byte_align? payload_addr: first_base_addr; // Align to 64B
        //Split dma request to axi requests
        // while(cur_address < payload_addr + payload_size) {}
        while(cur_address <= last_base_addr) {
    
    
            base_addr    = cur_address;
            size_in_byte = AXI_TRANSACTION_ATOM_SIZE;
            // Check whether next ATOM belongs to current AXI transaction
            // while (((cur_address + DMA_TRANSACTION_ATOM_SIZE) < (payload_addr + payload_size)) && ((cur_address + DMA_TRANSACTION_ATOM_SIZE) % CVIF_MAX_MEM_TRANSACTION_SIZE != 0)) {
    
    
            //     size_in_byte += DMA_TRANSACTION_ATOM_SIZE;
            //     cur_address  += DMA_TRANSACTION_ATOM_SIZE;
            // }
            while (((cur_address + AXI_TRANSACTION_ATOM_SIZE) < (first_base_addr + total_axi_size)) && ((cur_address + AXI_TRANSACTION_ATOM_SIZE) % CVIF_MAX_MEM_TRANSACTION_SIZE != 0)) {
    
    
                size_in_byte += AXI_TRANSACTION_ATOM_SIZE;
                cur_address  += AXI_TRANSACTION_ATOM_SIZE;
            }
            // start address of next axi transaction
            cur_address += AXI_TRANSACTION_ATOM_SIZE;

            atom_num = size_in_byte / DMA_TRANSACTION_ATOM_SIZE;

            bt_payload = new dla_b_transport_payload(size_in_byte, dla_b_transport_payload::DLA_B_TRANSPORT_PAYLOAD_TYPE_MC);
            axi_byte_enable_ptr = bt_payload->gp.get_byte_enable_ptr();
            cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, base_addr=0x%lx size_in_byte=0x%x atom_num=0x%x\x0A", base_addr, size_in_byte, atom_num));

            for (byte_iter=0; byte_iter < size_in_byte; byte_iter++) {
    
    
                if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) && (byte_iter < DMA_TRANSACTION_ATOM_SIZE)) {
    
    
                    // Diable 1st DMA atom of the unaligned first_base_addr
                    axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED;  // All bytes should be enabled
                } else if (( (base_addr + size_in_byte) == (last_base_addr+AXI_TRANSACTION_ATOM_SIZE)) && (false == is_rear_64byte_align) && (byte_iter >= size_in_byte - DMA_TRANSACTION_ATOM_SIZE)) {
    
    
                    // Diable 2nd DMA atom of the unaligned last_base_addr
                    axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED;  // All bytes should be enabled
                } else {
    
    
                    axi_byte_enable_ptr[byte_iter] = TLM_BYTE_ENABLED;  // All bytes should be enabled
                }
            }
            cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, TLM_BYTE_ENABLE is done\x0A"));

            for (atom_iter=0; atom_iter < atom_num; atom_iter++) {
    
    
                if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) && (0 == atom_iter)) {
    
    
                    // Disable 1st DMA atom of the unaligned first_base_addr
                    // Use unaligned address as required by DBB_PV
                    memset(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], 0, DMA_TRANSACTION_ATOM_SIZE);
                } else if (((base_addr + size_in_byte) == (last_base_addr+AXI_TRANSACTION_ATOM_SIZE)) && (false == is_rear_64byte_align) && ( (atom_iter + 1) == atom_num)) {
    
    
                    // Disable 2nd DMA atom of the unaligned last_base_addr
                    memset(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], 0, DMA_TRANSACTION_ATOM_SIZE);
                } else {
    
    
                    cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, before read an atom from cdp2cvif_wr_data_fifo_, base_addr = 0x%lx, atom_iter=0x%x\x0A", base_addr, atom_iter));

                    axi_atom_ptr = cdp2cvif_wr_data_fifo_->read();
                    for(int i=0; i<DMA_TRANSACTION_ATOM_SIZE; i++) {
    
    
                        cslDebug((50, "%02x ", axi_atom_ptr[i]));
                    }
                    cslDebug((50, "\x0A"));
                    cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, after read an atom from cdp2cvif_wr_data_fifo_\x0A"));
                    memcpy(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], axi_atom_ptr, DMA_TRANSACTION_ATOM_SIZE);
                    delete[] axi_atom_ptr;
                }
            }

            if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) ) {
    
    
                base_addr += DMA_TRANSACTION_ATOM_SIZE;
            }
            cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, base_address=0x%lx size in byte=0x%x\x0A", base_addr, size_in_byte));
            // Prepare write payload
            bt_payload->configure_gp(base_addr, size_in_byte, is_read);
            bt_payload->gp.get_extension(nvdla_dbb_ext);
            cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, sending write command to cdp_wr_req_fifo_.\x0A"));
            cslDebug((50, "    addr: 0x%016lx\x0A", base_addr));
            cslDebug((50, "    size: %d\x0A", size_in_byte));
            nvdla_dbb_ext->set_id(CDP_AXI_ID);
            nvdla_dbb_ext->set_size(64);
            nvdla_dbb_ext->set_length(size_in_byte/AXI_TRANSACTION_ATOM_SIZE);
            // if (base_addr%AXI_TRANSACTION_ATOM_SIZE != 0) //Set length(in unit of 64B) to be same as RTL
            //     nvdla_dbb_ext->set_length(((size_in_byte - DMA_TRANSACTION_ATOM_SIZE) + DMA_TRANSACTION_ATOM_SIZE)/AXI_TRANSACTION_ATOM_SIZE);
            // else // base_addr is aligned to 64Bytes
            //     nvdla_dbb_ext->set_length((size_in_byte + DMA_TRANSACTION_ATOM_SIZE)/AXI_TRANSACTION_ATOM_SIZE-1);

            // write payload to arbiter fifo
            cdp_wr_req_fifo_->write(bt_payload);

            // When the last split req is sent to ext, write true to cdp_wr_required_ack_fifo_ when ack is required.
            if (cur_address >= (payload_addr + payload_size)) {
    
    
                if(cdp_wr_req->require_ack!=0) {
    
    
                    cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, require ack.\x0A"));
                    cdp_wr_required_ack_fifo_->write(true);
                }
                else {
    
    
                    cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, does not require ack.\x0A"));
                    cdp_wr_required_ack_fifo_->write(false);
                }
            }
            else {
    
    
                cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, does not require ack.\x0A"));
                cdp_wr_required_ack_fifo_->write(false);
            }
        }
        delete cdp_wr_req;
        cslDebug((50, "NV_NVDLA_cvif::WriteRequest_cdp2cvif, write command processing done\x0A"));
    }
}

这段代码是NV_NVDLA_cvif类中的WriteRequest_cdp2cvif函数,它处理来自跨通道数据处理器(CDP)到卷积接口(CVIF)的写请求。主要功能如下:

  1. 读取从cdp2cvif_wr_cmd_fifo_中接收到的写请求命令。
  2. 解析请求的地址(payload_addr)和大小(payload_size)。
  3. 确定请求是否以64字节对齐(is_base_64byte_align)以及请求的起始和结束地址(first_base_addr和last_base_addr)。
  4. 根据请求的大小,将请求分割成一个或多个AXI事务。
  5. 为每个AXI事务创建dla_b_transport_payload对象,配置字节启用位(axi_byte_enable_ptr)以处理不对齐的情况。
  6. cdp2cvif_wr_data_fifo_中读取数据并复制到AXI事务中。
  7. 将AXI事务的信息写入cdp_wr_req_fifo_,包括地址、大小和ID等。
  8. 如果请求需要确认(require_ack),则向cdp_wr_required_ack_fifo_中写入true,否则写入false。
  9. 删除已处理的写请求对象。
  10. 循环执行,处理下一个写请求。

这个函数的主要目的是将CDP到CVIF之间的写请求拆分成多个AXI事务,并将这些事务发送给卷积接口(CVIF)。它还处理不对齐的情况,并在需要时发送确认。

rbk2cvif_wr_req_b_transport


void NV_NVDLA_cvif::rbk2cvif_wr_req_b_transport(int ID, nvdla_dma_wr_req_t* payload, sc_time& delay) {
    
    
    uint32_t packet_id;
    uint8_t  *dma_payload_data_ptr;
    uint8_t  *data_ptr;
    uint32_t rest_size, incoming_size;
    client_cvif_wr_req_t * rbk_wr_req;

    packet_id = payload->tag;
    if (TAG_CMD == packet_id) {
    
    
        rbk_wr_req_count_ ++;
#pragma CTC SKIP
        if (true == has_rbk_onging_wr_req_) {
    
    
            FAIL(("NV_NVDLA_cvif::rbk2cvif_wr_req_b_transport, got two consective command request, one command request shall be followed by one or more data request."));
        }
#pragma CTC ENDSKIP
	else {
    
    
            has_rbk_onging_wr_req_ = true;
        }

        rbk_wr_req = new client_cvif_wr_req_t;
        rbk_wr_req->addr  = payload->pd.dma_write_cmd.addr;
        rbk_wr_req->size  = (payload->pd.dma_write_cmd.size + 1) * DMA_TRANSACTION_ATOM_SIZE;    //In byte
        rbk_wr_req->require_ack = payload->pd.dma_write_cmd.require_ack;
        cslDebug((50, "before write to rbk2cvif_wr_cmd_fifo_\x0A"));
        rbk2cvif_wr_cmd_fifo_->write(rbk_wr_req);
        cslDebug((50, "after write to rbk2cvif_wr_cmd_fifo_\x0A"));
        rbk_wr_req_got_size_ = 0;
        rbk_wr_req_size_ = rbk_wr_req->size;

这段代码是一个用于数据传输的函数,用于处理来自RBK(Reconfigurable Block)到CVIF(Cache Virtual Interface)的写请求传输。以下是该函数的主要功能:

  1. 解析输入参数:

    • 通过payload参数获取请求的tag,通常用于标识请求类型。
  2. 对于命令请求(TAG_CMD):

    • 如果已经存在连续的命令请求,抛出错误。
    • 否则,创建一个新的rbk_wr_req对象,并初始化它的地址、大小和是否需要确认。
    • 将rbk_wr_req对象写入rbk2cvif_wr_cmd_fifo_中,该对象包含了写请求的相关信息。
    • 重置rbk_wr_req_got_size_和rbk_wr_req_size_。

这个函数的主要作用是处理RBK到CVIF之间的写请求传输,包括命令请求,以确保数据的正确传输,并维护请求的连续性。这段代码未提供完整的函数实现,但是可以看出它与之前提到的CDP到CVIF写请求处理函数具有相似的结构和目的。


    } else {
    
    
        dma_payload_data_ptr = reinterpret_cast <uint8_t *> (payload->pd.dma_write_data.data);
        rest_size = rbk_wr_req_size_ - rbk_wr_req_got_size_;
        incoming_size = min(rest_size, uint32_t (DMA_TRANSACTION_MAX_SIZE));
        data_ptr = new uint8_t[DMA_TRANSACTION_ATOM_SIZE];
        memcpy(data_ptr, dma_payload_data_ptr, DMA_TRANSACTION_ATOM_SIZE);
        cslDebug((50, "before write to rbk2cvif_wr_data_fifo_\x0A"));
        rbk2cvif_wr_data_fifo_->write(data_ptr);   // Write to FIFO in 32Byte atom
        cslDebug((50, "after write to rbk2cvif_wr_data_fifo_\x0A"));
        rbk_wr_req_got_size_ += incoming_size;
        for(int i = 0; i < DMA_TRANSACTION_ATOM_SIZE; i++) {
    
    
            cslDebug((50, "%x ", data_ptr[i]));
        }
        cslDebug((50, "\x0A"));
        if (incoming_size==DMA_TRANSACTION_MAX_SIZE) {
    
     // The payload is 64B
            data_ptr = new uint8_t[DMA_TRANSACTION_ATOM_SIZE];
            memcpy(data_ptr, &dma_payload_data_ptr[DMA_TRANSACTION_ATOM_SIZE], DMA_TRANSACTION_ATOM_SIZE);
            cslDebug((50, "write to rbk2cvif_wr_data_fifo_\x0A"));
            rbk2cvif_wr_data_fifo_->write(data_ptr);
            for(int i = 0; i < DMA_TRANSACTION_ATOM_SIZE; i++) {
    
    
                cslDebug((50, "%x ", data_ptr[i]));
            }
            cslDebug((50, "\x0A"));
        }

        if (rbk_wr_req_got_size_ == rbk_wr_req_size_) {
    
    
            has_rbk_onging_wr_req_ = false;
        }
    }
}

这段代码是一个用于数据传输的函数,用于处理来自RBK(Reconfigurable Block)到CVIF(Cache Virtual Interface)的写请求传输。以下是该函数的主要功能:

  1. 对于数据请求(不是命令请求):
    • 从payload参数中获取数据指针dma_payload_data_ptr,该指针包含要写入CVIF的数据。
    • 计算尚未接收的数据大小(rest_size)和本次实际接收的数据大小(incoming_size),并选择较小的值。
    • 创建新的uint8_t类型的data_ptr,并从dma_payload_data_ptr中复制数据到data_ptr中。
    • data_ptr写入rbk2cvif_wr_data_fifo_中,以32字节为单位进行数据传输。
    • 更新已接收数据大小(rbk_wr_req_got_size_)。
    • 如果incoming_size等于DMA_TRANSACTION_MAX_SIZE(64字节),则继续接收数据并写入rbk2cvif_wr_data_fifo_
    • 如果已接收的数据大小等于请求的数据大小(rbk_wr_req_size_),则表示请求完成,将has_rbk_onging_wr_req_设置为false。

这个函数的主要目的是处理RBK到CVIF之间的数据写请求,包括数据的接收和传输,以确保数据的正确传输并维护请求的连续性。它类似于之前提到的CDP到CVIF的写请求处理函数,但用于RBK到CVIF的数据传输。

WriteRequest_rbk2cvif


void NV_NVDLA_cvif::WriteRequest_rbk2cvif() {
    
    
    uint64_t base_addr;
    uint64_t first_base_addr;
    uint64_t last_base_addr;
    uint64_t cur_address;
    uint32_t size_in_byte;
    uint32_t total_axi_size;
    uint64_t payload_addr;
    uint32_t payload_size;
    uint8_t* axi_byte_enable_ptr;
    uint32_t byte_iter;
    uint32_t atom_iter;
    uint32_t atom_num;
    bool     is_base_64byte_align;
    bool     is_rear_64byte_align;
    bool     is_read=false;
    uint8_t  *axi_atom_ptr;
    nvdla_dbb_extension *nvdla_dbb_ext = NULL;
    client_cvif_wr_req_t   * rbk_wr_req;
    dla_b_transport_payload *bt_payload;

    while(true) {
    
    
        // Read one write command
        rbk_wr_req = rbk2cvif_wr_cmd_fifo_->read();
        payload_addr = rbk_wr_req->addr;   // It's aligend to 32B, not 64B
        payload_size = rbk_wr_req->size;
        cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, got one write command from rbk2cvif_wr_cmd_fifo_\x0A"));
        cslDebug((50, "    payload_addr: 0x%lx\x0A", payload_addr));
        cslDebug((50, "    payload_size: 0x%x\x0A", payload_size));

        is_base_64byte_align = payload_addr%AXI_TRANSACTION_ATOM_SIZE == 0;
        first_base_addr = is_base_64byte_align? payload_addr: payload_addr - DMA_TRANSACTION_ATOM_SIZE; // Align to 64B
        is_rear_64byte_align = (payload_addr + payload_size) % AXI_TRANSACTION_ATOM_SIZE == 0;
        // According to DBB_PV standard, data_length shall be equal or greater than DBB_PV m_size * m_length no matter the transactions is aglined or not
        total_axi_size = payload_size + (is_base_64byte_align? 0: DMA_TRANSACTION_ATOM_SIZE) + (is_rear_64byte_align? 0: DMA_TRANSACTION_ATOM_SIZE);
        last_base_addr = first_base_addr + total_axi_size - AXI_TRANSACTION_ATOM_SIZE;
        // if ( total_axi_size <= AXI_TRANSACTION_ATOM_SIZE ) {
    
    
        //     // The first and last transaction is actually the same
        //     last_base_addr = first_base_addr;
        // } else {
    
    
        //     last_base_addr = (first_base_addr + total_axi_size) - (first_base_addr + total_axi_size)%AXI_TRANSACTION_ATOM_SIZE;
        // }
        // if (total_axi_size + first_base_addr%CVIF_MAX_MEM_TRANSACTION_SIZE <= CVIF_MAX_MEM_TRANSACTION_SIZE) {
    
    
        //     // Base and last are in the same AXI transaction
        // } else {
    
    
        //     // Base and last are in different AXI transaction
        //     last_base_addr = 
        // }
        // } else if ((first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE != 0) {
    
    
        //     if (total_axi_size >= (first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE) {
    
    
        //         last_base_addr = first_base_addr + total_axi_size - (first_base_addr + total_axi_size)%CVIF_MAX_MEM_TRANSACTION_SIZE;
        //     } else {
    
    
        //         last_base_addr = first_base_addr;
        //     }
        // } else {
    
    
        //     if (total_axi_size >= CVIF_MAX_MEM_TRANSACTION_SIZE) {
    
    
        //         last_base_addr = first_base_addr + total_axi_size - CVIF_MAX_MEM_TRANSACTION_SIZE;
        //     } else {
    
    
        //         last_base_addr = first_base_addr;
        //     }
        // }
        cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif:\x0A"));
        cslDebug((50, "    first_base_addr: 0x%lx\x0A", first_base_addr));
        cslDebug((50, "    last_base_addr: 0x%lx\x0A", last_base_addr));
        cslDebug((50, "    total_axi_size: 0x%x\x0A", total_axi_size));

这段代码是NV_NVDLA_cvif类中的WriteRequest_rbk2cvif函数,它用于处理来自RBK(Reconfigurable Block)到CVIF(Cache Virtual Interface)的写请求传输。以下是该函数的主要功能:

  1. 读取从rbk2cvif_wr_cmd_fifo_中接收到的写请求命令。
  2. 解析请求的地址(payload_addr)和大小(payload_size)。
  3. 确定请求是否以64字节对齐(is_base_64byte_align)以及请求的起始和结束地址(first_base_addr和last_base_addr)。
  4. 根据请求的大小,将请求分割成一个或多个AXI事务。
  5. 为每个AXI事务创建dla_b_transport_payload对象,配置字节启用位(axi_byte_enable_ptr)以处理不对齐的情况。
  6. rbk2cvif_wr_data_fifo_中读取数据并复制到AXI事务中。
  7. 将AXI事务的信息写入cdp_wr_req_fifo_,包括地址、大小和ID等。
  8. 如果请求需要确认(require_ack),则向rbk_wr_required_ack_fifo_中写入true,否则写入false。

这个函数的主要目的是将RBK到CVIF之间的写请求拆分成多个AXI事务,并将这些事务发送给卷积接口(CVIF)。它还处理不对齐的情况,并在需要时发送确认。这段代码的结构与之前提到的CDP到CVIF的写请求处理函数类似,用于RBK到CVIF的数据传输。

      // cur_address = payload_addr;
        cur_address = is_base_64byte_align? payload_addr: first_base_addr; // Align to 64B
        //Split dma request to axi requests
        // while(cur_address < payload_addr + payload_size) {}
        while(cur_address <= last_base_addr) {
    
    
            base_addr    = cur_address;
            size_in_byte = AXI_TRANSACTION_ATOM_SIZE;
            // Check whether next ATOM belongs to current AXI transaction
            // while (((cur_address + DMA_TRANSACTION_ATOM_SIZE) < (payload_addr + payload_size)) && ((cur_address + DMA_TRANSACTION_ATOM_SIZE) % CVIF_MAX_MEM_TRANSACTION_SIZE != 0)) {
    
    
            //     size_in_byte += DMA_TRANSACTION_ATOM_SIZE;
            //     cur_address  += DMA_TRANSACTION_ATOM_SIZE;
            // }
            while (((cur_address + AXI_TRANSACTION_ATOM_SIZE) < (first_base_addr + total_axi_size)) && ((cur_address + AXI_TRANSACTION_ATOM_SIZE) % CVIF_MAX_MEM_TRANSACTION_SIZE != 0)) {
    
    
                size_in_byte += AXI_TRANSACTION_ATOM_SIZE;
                cur_address  += AXI_TRANSACTION_ATOM_SIZE;
            }
            // start address of next axi transaction
            cur_address += AXI_TRANSACTION_ATOM_SIZE;

            atom_num = size_in_byte / DMA_TRANSACTION_ATOM_SIZE;

            bt_payload = new dla_b_transport_payload(size_in_byte, dla_b_transport_payload::DLA_B_TRANSPORT_PAYLOAD_TYPE_MC);
            axi_byte_enable_ptr = bt_payload->gp.get_byte_enable_ptr();
            cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, base_addr=0x%lx size_in_byte=0x%x atom_num=0x%x\x0A", base_addr, size_in_byte, atom_num));

            for (byte_iter=0; byte_iter < size_in_byte; byte_iter++) {
    
    
                if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) && (byte_iter < DMA_TRANSACTION_ATOM_SIZE)) {
    
    
                    // Diable 1st DMA atom of the unaligned first_base_addr
                    axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED;  // All bytes should be enabled
                } else if (( (base_addr + size_in_byte) == (last_base_addr+AXI_TRANSACTION_ATOM_SIZE)) && (false == is_rear_64byte_align) && (byte_iter >= size_in_byte - DMA_TRANSACTION_ATOM_SIZE)) {
    
    
                    // Diable 2nd DMA atom of the unaligned last_base_addr
                    axi_byte_enable_ptr[byte_iter] = TLM_BYTE_DISABLED;  // All bytes should be enabled
                } else {
    
    
                    axi_byte_enable_ptr[byte_iter] = TLM_BYTE_ENABLED;  // All bytes should be enabled
                }
            }
            cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, TLM_BYTE_ENABLE is done\x0A"));

            for (atom_iter=0; atom_iter < atom_num; atom_iter++) {
    
    
                if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) && (0 == atom_iter)) {
    
    
                    // Disable 1st DMA atom of the unaligned first_base_addr
                    // Use unaligned address as required by DBB_PV
                    memset(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], 0, DMA_TRANSACTION_ATOM_SIZE);
                } else if (((base_addr + size_in_byte) == (last_base_addr+AXI_TRANSACTION_ATOM_SIZE)) && (false == is_rear_64byte_align) && ( (atom_iter + 1) == atom_num)) {
    
    
                    // Disable 2nd DMA atom of the unaligned last_base_addr
                    memset(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], 0, DMA_TRANSACTION_ATOM_SIZE);
                } else {
    
    
                    cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, before read an atom from rbk2cvif_wr_data_fifo_, base_addr = 0x%lx, atom_iter=0x%x\x0A", base_addr, atom_iter));

                    axi_atom_ptr = rbk2cvif_wr_data_fifo_->read();
                    for(int i=0; i<DMA_TRANSACTION_ATOM_SIZE; i++) {
    
    
                        cslDebug((50, "%02x ", axi_atom_ptr[i]));
                    }
                    cslDebug((50, "\x0A"));
                    cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, after read an atom from rbk2cvif_wr_data_fifo_\x0A"));
                    memcpy(&bt_payload->data[atom_iter*DMA_TRANSACTION_ATOM_SIZE], axi_atom_ptr, DMA_TRANSACTION_ATOM_SIZE);
                    delete[] axi_atom_ptr;
                }
            }

这部分代码是WriteRequest_rbk2cvif函数中的循环部分,用于拆分RBK到CVIF的写请求命令为多个AXI事务并生成相应的dla_b_transport_payload,以便发送到CVIF接口。

具体操作如下:

  1. 首先,它从rbk2cvif_wr_cmd_fifo_中读取一个写请求命令,获取请求的起始地址(payload_addr)和大小(payload_size)。

  2. 然后,它确定请求是否以64字节对齐,并计算请求的起始地址(first_base_addr)和结束地址(last_base_addr)。

  3. 接下来,它以64字节对齐的方式初始化当前地址(cur_address)。

  4. 循环处理,根据请求的大小和对齐情况,将请求分割成一个或多个AXI事务。它检查是否下一个数据原子(DMA_TRANSACTION_ATOM_SIZE)仍然属于当前AXI事务,如果是,则增加事务的大小(size_in_byte)。

  5. 对于每个AXI事务,它为事务创建一个dla_b_transport_payload对象,并配置字节启用位(axi_byte_enable_ptr)以处理不对齐的情况。

  6. 根据是否处于不对齐的情况以及是否为第一个或最后一个数据原子,设置字节启用位。如果是不对齐的情况,它会禁用相应的字节。

  7. 对于每个数据原子,它从rbk2cvif_wr_data_fifo_中读取数据(axi_atom_ptr)并将数据复制到AXI事务中。

  8. 最后,它将生成的AXI事务的信息写入cdp_wr_req_fifo_,包括地址、大小和其他相关信息。

这个循环部分的主要目的是将一个写请求拆分为多个AXI事务,以确保数据的正确传输和对齐,并生成相应的AXI事务数据。这些AXI事务将发送到CVIF接口进行处理。


            if ( (base_addr == first_base_addr) && (false == is_base_64byte_align) ) {
    
    
                base_addr += DMA_TRANSACTION_ATOM_SIZE;
            }
            cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, base_address=0x%lx size in byte=0x%x\x0A", base_addr, size_in_byte));
            // Prepare write payload
            bt_payload->configure_gp(base_addr, size_in_byte, is_read);
            bt_payload->gp.get_extension(nvdla_dbb_ext);
            cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, sending write command to rbk_wr_req_fifo_.\x0A"));
            cslDebug((50, "    addr: 0x%016lx\x0A", base_addr));
            cslDebug((50, "    size: %d\x0A", size_in_byte));
            nvdla_dbb_ext->set_id(RBK_AXI_ID);
            nvdla_dbb_ext->set_size(64);
            nvdla_dbb_ext->set_length(size_in_byte/AXI_TRANSACTION_ATOM_SIZE);
            // if (base_addr%AXI_TRANSACTION_ATOM_SIZE != 0) //Set length(in unit of 64B) to be same as RTL
            //     nvdla_dbb_ext->set_length(((size_in_byte - DMA_TRANSACTION_ATOM_SIZE) + DMA_TRANSACTION_ATOM_SIZE)/AXI_TRANSACTION_ATOM_SIZE);
            // else // base_addr is aligned to 64Bytes
            //     nvdla_dbb_ext->set_length((size_in_byte + DMA_TRANSACTION_ATOM_SIZE)/AXI_TRANSACTION_ATOM_SIZE-1);

            // write payload to arbiter fifo
            rbk_wr_req_fifo_->write(bt_payload);

            // When the last split req is sent to ext, write true to rbk_wr_required_ack_fifo_ when ack is required.
            if (cur_address >= (payload_addr + payload_size)) {
    
    
                if(rbk_wr_req->require_ack!=0) {
    
    
                    cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, require ack.\x0A"));
                    rbk_wr_required_ack_fifo_->write(true);
                }
                else {
    
    
                    cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, does not require ack.\x0A"));
                    rbk_wr_required_ack_fifo_->write(false);
                }
            }
            else {
    
    
                cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, does not require ack.\x0A"));
                rbk_wr_required_ack_fifo_->write(false);
            }
        }
        delete rbk_wr_req;
        cslDebug((50, "NV_NVDLA_cvif::WriteRequest_rbk2cvif, write command processing done\x0A"));
    }
}

这段代码是 WriteRequest_rbk2cvif 函数的后续部分,处理了生成的 AXI 事务的尾部以及根据是否需要 ACK 来向 rbk_wr_required_ack_fifo_ 写入信息。

具体步骤如下:

  1. 首先,它检查是否在一个不对齐的事务的开头,如果是,则增加 base_addr 以处理下一个 DMA 原子。

  2. 然后,它将 bt_payload 的属性配置,包括地址、大小和读写模式。

  3. 接着,它获取 nvdla_dbb_ext 扩展,用于设置 AXI 事务的 ID、大小和长度。

  4. 然后,它将 bt_payload 写入 rbk_wr_req_fifo_,将生成的 AXI 事务添加到 FIFO 队列中,以便之后将其发送到 CVIF 接口。

  5. 接下来,它检查当前是否已经处理了请求的最后一个 AXI 事务。如果是,它会根据请求是否需要 ACK 来向 rbk_wr_required_ack_fifo_ 写入相应的信息。如果需要 ACK,它将写入 true,否则写入 false

  6. 最后,它删除了用于生成 AXI 事务的 rbk_wr_req 对象,释放内存。

这段代码主要用于处理 AXI 事务的尾部,配置 AXI 事务属性并将其写入 FIFO,以及确定是否需要 ACK。这样就完成了从 RBK 到 CVIF 的写请求的处理。

猜你喜欢

转载自blog.csdn.net/lincolnjunior_lj/article/details/134006340