[HLS] array reshape

array reshape结合了ARRAY_PARTITION(将数组拆分为更小的数组)的效果和垂直类型的ARRAY_MAP(通过增加位宽度连接数组元素)的效果。这减少了所消耗的块RAM的数量,同时提供了分区的主要好处:并行访问数据。这个pragma创建了一个新的数组,它具有更少的元素,但具有更大的位宽度,允许在单个时钟周期中访问更多的数据。


#pragma HLS array_reshape variable=<name>  <type>  factor=<int>  dim=<int>

默认complete 不需要factor
dim 特定维展开,0是所有,1 是第一维度
8-bit 数组AB[4][2][2] ->128 bits (422*8)一个寄存器
在这里插入图片描述

1 二维矩阵乘法 优化

#include <cmath>
using namespace std;

// Compare TB vs HW C-model and/or RTL
#define HW_COSIM

#define MAT_A_ROWS 3
#define MAT_A_COLS 3
#define MAT_B_ROWS 3
#define MAT_B_COLS 3

typedef char mat_a_t;
typedef char mat_b_t;
typedef short result_t;
void matrixmul(
      mat_a_t a[MAT_A_ROWS][MAT_A_COLS],
      mat_b_t b[MAT_B_ROWS][MAT_B_COLS],
      result_t res[MAT_A_ROWS][MAT_B_COLS])
{
    
    
#pragma HLS ARRAY_RESHAPE variable=a complete dim=2
#pragma HLS ARRAY_RESHAPE variable=b complete dim=1
  // Iterate over the rows of the A matrix
   Row: for(int i = 0; i < MAT_A_ROWS; i++) {
    
    
      // Iterate over the columns of the B matrix
      Col: for(int j = 0; j < MAT_B_COLS; j++) {
    
    
#pragma HLS PIPELINE
         res[i][j] = 0;
         // Do the inner product of a row of A and col of B
         Product: for(int k = 0; k < MAT_B_ROWS; k++) {
    
    
            res[i][j] += a[i][k] * b[k][j];
         }
      }
   }

在这里插入图片描述

void matrixmul(
      mat_a_t a[MAT_A_ROWS][MAT_A_COLS],
      mat_b_t b[MAT_B_ROWS][MAT_B_COLS],
      result_t res[MAT_A_ROWS][MAT_B_COLS])
{
    
    
#pragma HLS ARRAY_RESHAPE variable=b complete dim=1
#pragma HLS ARRAY_RESHAPE variable=a complete dim=2
#pragma HLS INTERFACE ap_fifo port=a
#pragma HLS INTERFACE ap_fifo port=b
#pragma HLS INTERFACE ap_fifo port=res
  mat_a_t a_row[MAT_A_ROWS];//逐行缓存
  mat_b_t b_copy[MAT_B_ROWS][MAT_B_COLS];//缓存b
  int tmp = 0;

  // Iterate over the rowa of the A matrix
  Row: for(int i = 0; i < MAT_A_ROWS; i++) {
    
    
    // Iterate over the columns of the B matrix
    Col: for(int j = 0; j < MAT_B_COLS; j++) {
    
    
#pragma HLS PIPELINE rewind
      // Do the inner product of a row of A and col of B
      tmp=0;
      // Cache each row (so it's only read once per function)
      if (j == 0)//j=0时候,循环i,缓存行
        Cache_Row: for(int k = 0; k < MAT_A_ROWS; k++)
          a_row[k] = a[i][k];
      
       // Cache all cols (so they are only read once per function)
     if (i == 0)//根据j++变化,逐个缓存各个列。
            Cache_Col: for(int k = 0; k < MAT_B_ROWS; k++)
               b_copy[k][j] = b[k][j];

      Product: for(int k = 0; k < MAT_B_ROWS; k++) {
    
    
        tmp += a_row[k] * b_copy[k][j];
      }
      res[i][j] = tmp;
    }
  }
}

猜你喜欢

转载自blog.csdn.net/qq_35608277/article/details/113528007
HLS