Embedded and Reconfigurable Systems M之HLS

一开始我们从简单的开始,设计一个计数器,如下框图:

很简单,波形如下:(eg:RANGE = 3)

contatore.h
 

#include "ap_int.h"

typedef ap_uint<1> bit;
#define RANGE 10
void contatore_no_io(volatile bit *led_output);

必须包含头文件,"ap_int.h“,ap_uint<5> 表示一个无符号的5bit,ap_int<5> signed a 5 bit。

contatore.cpp

#include "contatore.h"

void contatore_no_io(volatile	bit	*led_output)
{
static unsigned int	counter_value	=	0;
static bit	led_status	=	0;
counter_value++;
if	(counter_value>=RANGE)	{
	led_status	=	not(led_status);
	counter_value=0;
}
*led_output	=	led_status;
		return;
}

testbench.cpp

#include "contatore.h"
int main()
{
bit	led_output_variable=0;
for	(int	i=0;	i<100;	i++)
{
	contatore_no_io(&led_output_variable);
	printf("Iter	%d\t	Led	=	%d\n",	i,(int)led_output_variable);
}

printf("\n\n>>>	End	simulation	<<<\n\n");
return	0;
}

pragma HLS reset指令:

如果变量是静态变量或全局变量,则RESET pragma用于显式添加复位,或者可以通过off pragma从复位中删除变量。 当设计中存在静态或全局数组时,这可能特别有用。

ap_ovld:顾名思义,输出的有效信号。

Interfacce:

Latency and Interval

Latency:从输入到输出进过的周期。
Throughput/Interval:循环流水线操作的一个重要术语叫做Initiation IntervalII ,它是连续循环迭代开始时间之间的时钟周期数。

由下图一目了然:

接下来我们通过axi_lite来控制计数器的计数周期,

counter_base_axi_lite.cpp

#include "counter_base_axi_lite.h"

void counter_base_axi_lite(bit enable_count, volatile unsigned int range_counter,
							volatile int *output_value, volatile bit *led_output)
{

#pragma HLS INTERFACE ap_none port=range_counter
#pragma HLS INTERFACE ap_none port=enable_count
#pragma HLS INTERFACE s_axilite register port=range_counter bundle=commands
#pragma HLS INTERFACE s_axilite register port=enable_count bundle=commands
#pragma HLS INTERFACE ap_none port=led_output
#pragma HLS INTERFACE ap_none port=output_value

	static unsigned int counter_value = 0;
	static bit led_status = 0;
	static unsigned int BASE_COUNT = 5;
	if (enable_count == 0)
		BASE_COUNT = range_counter;
	else
	{
		counter_value++;
		if (counter_value >= BASE_COUNT)
		{
			counter_value = 0;
			led_status = not(led_status);
		}
	}
	*led_output = led_status;
	*output_value = counter_value;
	#ifndef __SYNTHESIS__
	printf("Base count = %d\tCounter value = %d\n",BASE_COUNT, counter_value);
	#endif
		return;
}

test_bench.cpp

#include "counter_base_axi_lite.h"
int main()
{
int testbench_error = 0;
int count_value = 0;
bit led_output_variable;
unsigned int RANGE = 10;
bit ENABLE = 0;
for (int i=1; i<20; i++)
{
	counter_base_axi_lite(ENABLE, RANGE, &count_value, &led_output_variable);
	printf("ENABLE = %d\tRANGE = %d\tConter value = %d\t Led = %d\n",(int)ENABLE, RANGE,count_value, (int)led_output_variable);
}
ENABLE = 1;
for (int i=1; i<20; i++)
{
	counter_base_axi_lite(ENABLE, RANGE, &count_value, &led_output_variable);
	printf("ENABLE = %d\tRANGE = %d\tConter value = %d\t Led = %d\n",(int)ENABLE, RANGE,count_value, (int)led_output_variable);
// cross-check output port count_value
	if (count_value != i % RANGE)
	testbench_error++;
}
if (testbench_error == 0)
{
	printf(">>> C simulation: OK <<<\n");
	return 0;
}
else
{
	printf(">>> C simulation: FAILED <<<\n");
	return -1;
}
}

在vivado导入改ip

导入SDK:

在这个文件夹下,有这个ip的驱动代码:

helloworld.c
 

#include <stdio.h>
#include "platform.h“
#include "xcounter_base_axi_lite.h“
#include <unistd.h>
int main()
{
init_platform();
XCounter_base_axi_lite InstancePtr;
u16 DeviceId = 0;
XCounter_base_axi_lite_LookupConfig(DeviceId);
int error = XCounter_base_axi_lite_Initialize(&InstancePtr, DeviceId);
if (error == 0)
printf("Custom AXI counter: initialization OK :-)\n");
else
printf("Custom AXI counter: initialization failed :-(\n");
useconds_t sleeping_time_us = 5000000;
unsigned int RANGE = 0x008FFFFF;
printf("Disabling counter...\n");
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 0);
printf("Setting range...\n");
XCounter_base_axi_lite_Set_range_counter(&InstancePtr, RANGE);
printf("Enabling counter with base %d...\n", RANGE);
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 1);
printf("Counter up and running\n");
printf("Sleeping for %2.1f seconds\n", sleeping_time_us/1000000.0);
usleep(sleeping_time_us);
RANGE = 0x001FFFFF;
printf("Disabling counter...\n");
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 0);
printf("Setting range...\n");
XCounter_base_axi_lite_Set_range_counter(&InstancePtr, RANGE);
printf("Enabling counter with base %d...\n", RANGE);
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 1);
printf("Counter up and running\n");
cleanup_platform();
return 0;
}

优化 in Vivado HLS

以一个普通的数组相加为例:

void media(volatile int input_array[LENGTH], volatile int
*average_value)
{
int temp_sum=0;
for (int i=0; i<LENGTH; i++)
{
temp_sum = temp_sum + input_array[i];
}
*average_value = temp_sum;
return;
}

由图看出需要经过19个周期才输出一个结果;

接下来我们使用一个优化指令,#pragma HLS PIPELINE II=1

资源:

接口:

这里的输入数组算是一个memory,所以输出使能和地址的信号输入数据。

接下来我们使用一个优化指令,#pragma HLS loop UNROLL循环展开

综合的资源消耗,明显用面积换速度了。

最后给出一个VGA的实例代码,由hls生成:

vga.h

#include "ap_int.h“
#define BIT_OUT 4
// VGA resolution (640x480) @ 60 Hz
// sync pulse: negative logic
#define WIDTH 640
#define HEIGHT 480
// VERTICAL timing (rows)
#define VERTICAL_FRONT_PORCH 10
#define VERTICAL_SYNC_PULSE 2
#define VERTICAL_BACK_PORCH 33
// HORIZONTAL timing (clocks @ 25.175 MHz)
#define HORIZONTAL_FRONT_PORCH 16
#define HORIZONTAL_SYNC_PULSE 96
#define HORIZONTAL_BACK_PORCH 48
void vga_base(volatile ap_uint<BIT_OUT> *R, volatile ap_uint<BIT_OUT> *G,
volatile ap_uint<BIT_OUT> *B, volatile ap_uint<1> *V_SYNC,
volatile ap_uint<1> *H_SYNC);

vga.cpp

#include "vga.h“
void vga_base(volatile ap_uint<BIT_OUT> *R, volatile ap_uint<BIT_OUT> *G,
                volatile ap_uint<BIT_OUT> *B, volatile ap_uint<1> *V_SYNC,
                volatile ap_uint<1> *H_SYNC)
{
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE ap_none port=R
#pragma HLS INTERFACE ap_none port=B
#pragma HLS INTERFACE ap_none port=G
#pragma HLS INTERFACE ap_none port=V_SYNC
#pragma HLS INTERFACE ap_none port=H_SYNC

const int TOTAL_V = VERTICAL_SYNC_PULSE + VERTICAL_BACK_PORCH + HEIGHT                   +VERTICAL_FRONT_PORCH;
const int TOTAL_H = HORIZONTAL_SYNC_PULSE + HORIZONTAL_BACK_PORCH + WIDTH +
HORIZONTAL_FRONT_PORCH;
int x,y;
ap_uint<1> V_SYNC_temp = 1;
ap_uint<1> H_SYNC_temp = 1 ;
for (y=0; y<TOTAL_V; y++)
for (x=0; x<TOTAL_H; x++)
{
#pragma HLS PIPELINE II=1
if (y<VERTICAL_SYNC_PULSE) V_SYNC_temp=0;
else V_SYNC_temp=1;
if (x<HORIZONTAL_SYNC_PULSE) H_SYNC_temp=0;
else H_SYNC_temp=1;
if ((x>= HORIZONTAL_SYNC_PULSE + HORIZONTAL_BACK_PORCH) &&
    (x< HORIZONTAL_SYNC_PULSE + HORIZONTAL_BACK_PORCH + WIDTH) &&
    (y>= VERTICAL_SYNC_PULSE + VERTICAL_BACK_PORCH) &&
    (y< VERTICAL_SYNC_PULSE + VERTICAL_BACK_PORCH + HEIGHT))
    {
        // DISPLAY IMAGE
        *R=0;
        *G=15;
        *B=0;
        *V_SYNC = V_SYNC_temp;
        *H_SYNC = H_SYNC_temp;
    } else
    {
        *R=0;
        *G=0;
        *B=0;
        *V_SYNC = V_SYNC_temp;
        *H_SYNC = H_SYNC_temp;
    }
}
}

最后在vivado调用该ip

猜你喜欢

转载自blog.csdn.net/Laplace666/article/details/83792675