乘法器的verilog实现(并行、移位相加、查找表)
并行乘法器,也就是用乘法运算符实现,下面的代码实现8bit无符号数的乘法。
代码:
module mult_parrell(rst_n,
clk,
a,
b,
p
);
parameter DATA_SIZE = 8;
input rst_n;
input clk;
input [DATA_SIZE - 1 : 0] a;
input [DATA_SIZE - 1 : 0] b;
output [2*DATA_SIZE - 1 : 0] p;
reg [DATA_SIZE - 1 : 0] a_r;
reg [DATA_SIZE - 1 : 0] b_r;
wire [2*DATA_SIZE - 1 : 0] p_tmp;
reg [2*DATA_SIZE - 1 : 0] p;
//输入数据打一拍
always@(posedge clk)
if(!rst_n)
begin
a_r <= 8'd0;
b_r <= 8'd0;
end
else
begin
a_r <= a;
b_r <= b;
end
assign p_tmp = a*b; //只能做无符号数的相乘,若要做有符号数乘法,需将数据声明为signed类型
//输出数据打一拍
always@(posedge clk)
if(!rst_n)
begin
p <= 16'd0;
end
else
begin
p <= p_tmp;
end
endmodule
移位相加乘法器
下面的代码可实现8bit有符号数的相乘,注意符号扩展以及MSB位的处理:
//输入数据取反
assign a_r_inv = ~a_r + 1;
assign a_shift0 = b_r[0] ? {{8{a_r[7]}},a_r} : 0;
assign a_shift1 = b_r[1] ? {{7{a_r[7]}},a_r,1’b0} : 0;
assign a_shift2 = b_r[2] ? {{6{a_r[7]}},a_r,2’b0} : 0;
assign a_shift3 = b_r[3] ? {{5{a_r[7]}},a_r,3’b0} : 0;
assign a_shift4 = b_r[4] ? {{4{a_r[7]}},a_r,4’b0} : 0;
assign a_shift5 = b_r[5] ? {{3{a_r[7]}},a_r,5’b0} : 0;
assign a_shift6 = b_r[6] ? {{2{a_r[7]}},a_r,6’b0} : 0;
assign a_shift7 = b_r[7] ? {{1{a_r_inv[7]}},a_r_inv,7’b0} : 0; //被乘数为无符号数时,特别处理
代码:
module mult_shift_add(rst_n,
clk,
a,
b,
p
);
parameter DATA_SIZE = 8;
input rst_n;
input clk;
input [DATA_SIZE - 1 : 0] a;
input [DATA_SIZE - 1 : 0] b;
output [2*DATA_SIZE - 2 : 0] p;
//输入数据打一个时钟节拍
reg [DATA_SIZE - 1 : 0] a_r;
reg [DATA_SIZE - 1 : 0] b_r;
//输入数据取反
wire [DATA_SIZE - 1 : 0] a_r_inv;
//输入数据移位
wire [2*DATA_SIZE - 1 : 0] a_shift0;
wire [2*DATA_SIZE - 1 : 0] a_shift1;
wire [2*DATA_SIZE - 1 : 0] a_shift2;
wire [2*DATA_SIZE - 1 : 0] a_shift3;
wire [2*DATA_SIZE - 1 : 0] a_shift4;
wire [2*DATA_SIZE - 1 : 0] a_shift5;
wire [2*DATA_SIZE - 1 : 0] a_shift6;
wire [2*DATA_SIZE - 1 : 0] a_shift7;
//输出数据打一个时钟节拍
wire [2*DATA_SIZE - 1 : 0] p_tmp;
reg [2*DATA_SIZE - 1 : 0] p;
//输入数据打一个时钟节拍
always@(posedge clk)
if(!rst_n)
begin
a_r <= 8'd0;
b_r <= 8'd0;
end
else
begin
a_r <= a;
b_r <= b;
end
//输入数据取反
assign a_r_inv = ~a_r + 1;
//输入数据移位,注意符号扩展,不仅仅是最高位扩展
//对每一个bit都需扩展
assign a_shift0 = b_r[0] ? {{8{a_r[7]}},a_r} : 0;
assign a_shift1 = b_r[1] ? {{7{a_r[7]}},a_r,1'b0} : 0;
assign a_shift2 = b_r[2] ? {{6{a_r[7]}},a_r,2'b0} : 0;
assign a_shift3 = b_r[3] ? {{5{a_r[7]}},a_r,3'b0} : 0;
assign a_shift4 = b_r[4] ? {{4{a_r[7]}},a_r,4'b0} : 0;
assign a_shift5 = b_r[5] ? {{3{a_r[7]}},a_r,5'b0} : 0;
assign a_shift6 = b_r[6] ? {{2{a_r[7]}},a_r,6'b0} : 0;
assign a_shift7 = b_r[7] ? {{1{a_r_inv[7]}},a_r_inv,7'b0} : 0; //被乘数为无符号数时,特别处理
assign p_tmp = a_shift0 + a_shift1 + a_shift2 + a_shift3 + a_shift4
+ a_shift5 + a_shift6 + a_shift7;
always@(posedge clk)
if(!rst_n)
begin
//p <= 16'd0;
p <= 15'd0;
end
else
begin
//p <= p_tmp[15:0];
p <= p_tmp[14:0];
end
endmodule
testbench:
module mult_shift_add_tb;
// Inputs
reg rst_n;
reg clk;
reg [7:0] a;
reg [7:0] b;
// Outputs
wire [14:0] p;
// Instantiate the Unit Under Test (UUT)
mult_shift_add uut (
.rst_n(rst_n),
.clk(clk),
.a(a),
.b(b),
.p(p)
);
parameter CLK_PERIOD = 10;
initial begin
rst_n = 0;
clk = 0;
#100;
rst_n = 1;
end
always #(CLK_PERIOD/2) clk = ~clk;
always@(posedge clk)
if(!rst_n)
begin
a = 8'd0;
b = 8'd0;
end
else
begin
a = a + 1;
b = b - 1;
end
endmodule
ISIM仿真结果:
移位相加乘法器树:
将assign p_tmp = a_shift0 + a_shift1 + a_shift2 + a_shift3 + a_shift4 + a_shift5 + a_shift6 + a_shift7;
换为:
assign sum_01 = a_shift0 + a_shift1;
assign sum_23 = a_shift2 + a_shift3;
assign sum_45 = a_shift4 + a_shift5;
assign sum_67 = a_shift6 + a_shift7;
assign sum_0123 = sum_01 + sum_23;
assign sum_4567 = sum_45 + sum_67;
assign p_tmp = sum_0123 + sum_4567;
就成为乘法器树。
原理是通过切断关键路径,提高电路的运行频率。
LUT乘法
下面的代码利用2bit的LUT实现4bit无符号数的乘法。
代码:
module mult_lut(rst_n,
clk,
a,
b,
p
);
parameter DATA_SIZE = 4;
input rst_n;
input clk;
input [DATA_SIZE - 1 : 0] a;
input [DATA_SIZE - 1 : 0] b;
output [2*DATA_SIZE - 1 : 0] p;
//输入数据打一个时钟节拍
reg [DATA_SIZE - 1 : 0] a_r;
reg [DATA_SIZE - 1 : 0] b_r;
//输入数据拆半的乘积
wire [DATA_SIZE - 1 : 0] p_tmp00;
wire [DATA_SIZE - 1 : 0] p_tmp01;
wire [DATA_SIZE - 1 : 0] p_tmp10;
wire [DATA_SIZE - 1 : 0] p_tmp11;
//reg [2*DATA_SIZE - 1 : 0] sum01;
//reg [2*DATA_SIZE - 1 : 0] sum23;
wire [2*DATA_SIZE - 1 : 0] p_tmp;
reg [2*DATA_SIZE - 1 : 0] p;
//输入数据打一个时钟节拍
always@(posedge clk)
if(!rst_n)
begin
a_r <= 4'd0;
b_r <= 4'd0;
end
else
begin
a_r <= a;
b_r <= b;
end
mult_lut_2bit u0_mult_lut_2bit (
.rst_n(rst_n),
.clk(clk),
.a(a_r[1:0]),
.b(b_r[1:0]),
.p(p_tmp00)
);
mult_lut_2bit u1_mult_lut_2bit (
.rst_n(rst_n),
.clk(clk),
.a(a_r[1:0]),
.b(b_r[3:2]),
.p(p_tmp01)
);
mult_lut_2bit u2_mult_lut_2bit (
.rst_n(rst_n),
.clk(clk),
.a(a_r[3:2]),
.b(b_r[1:0]),
.p(p_tmp10)
);
mult_lut_2bit u3_mult_lut_2bit (
.rst_n(rst_n),
.clk(clk),
.a(a_r[3:2]),
.b(b_r[3:2]),
.p(p_tmp11)
);
//assign p_tmp = p_tmp00 + p_tmp01<<2 + p_tmp10<<2 + p_tmp11<<4; //不能直接用移位操作符实现移位
assign p_tmp = p_tmp00 + {p_tmp01,2'b00} + {p_tmp10,2'b00} + {p_tmp11,4'b00};
//assign sum01 = p_tmp00 + p_tmp01<<2;
//assign sum23 = p_tmp10<<2 + p_tmp11<<4;
//assign p_tmp = sum01 + sum23;
always@(posedge clk)
if(!rst_n)
begin
p <= 8'd0;
end
else
begin
p <= p_tmp;
end
endmodule
2bitLUT乘法器:
module mult_lut_2bit(rst_n,
clk,
a,
b,
p
);
parameter DATA_SIZE = 2;
input rst_n;
input clk;
input [DATA_SIZE - 1 : 0] a;
input [DATA_SIZE - 1 : 0] b;
output [2*DATA_SIZE - 1 : 0] p;
//输入数据打一个时钟节拍
reg [DATA_SIZE - 1 : 0] a_r;
reg [DATA_SIZE - 1 : 0] b_r;
//输出数据打一个时钟节拍
reg [2*DATA_SIZE - 1 : 0] p_tmp;
reg [2*DATA_SIZE - 1 : 0] p;
//输入数据打一个时钟节拍
always@(posedge clk)
if(!rst_n)
begin
a_r <= 8'd0;
b_r <= 8'd0;
end
else
begin
a_r <= a;
b_r <= b;
end
always@(*)
begin
case({a_r,b_r})
4'b0000 : p_tmp = 4'b0000;
4'b0001 : p_tmp = 4'b0000;
4'b0010 : p_tmp = 4'b0000;
4'b0011 : p_tmp = 4'b0000;
4'b0100 : p_tmp = 4'b0000;
4'b0101 : p_tmp = 4'b0001;
4'b0110 : p_tmp = 4'b0010;
4'b0111 : p_tmp = 4'b0011;
4'b1000 : p_tmp = 4'b0000;
4'b1001 : p_tmp = 4'b0010;
4'b1010 : p_tmp = 4'b0100;
4'b1011 : p_tmp = 4'b0110;
4'b1100 : p_tmp = 4'b0000;
4'b1101 : p_tmp = 4'b0011;
4'b1110 : p_tmp = 4'b0110;
4'b1111 : p_tmp = 4'b1001;
endcase
end
always@(posedge clk)
if(!rst_n)
begin
p <= 4'd0;
end
else
begin
p <= p_tmp[3:0];
end
endmodule
仿真结果与并行乘法一致。
上面的LUT乘法器求p_tmp的组合逻辑时延比较大,可以通过加入寄存器的方法进行拆分,将
assign p_tmp = p_tmp00 + {p_tmp01,2’b00} + {p_tmp10,2’b00} + {p_tmp11,4’b00};
替换为:
always@(posedge clk)
if(!rst_n)
begin
sum01 <= 8’d0;
sum23 <= 8’d0;
end
else
begin
sum01 <= p_tmp00 + {p_tmp01,2’b00};
sum23 <= {p_tmp10,2’b00} + {p_tmp11,4’b00};
end
assign p_tmp = sum01 + sum23;
这样就分割了组合逻辑,切断关键路径,从而提高电路的运行速度。虽然加入寄存器,对中间结果缓存,使得乘法器的输出对于输入的延时增加,但是提高了电路的整体运行频率,这是更重要的。
如下:
module mult_lut_reg(rst_n,
clk,
a,
b,
p
);
parameter DATA_SIZE = 4;
input rst_n;
input clk;
input [DATA_SIZE - 1 : 0] a;
input [DATA_SIZE - 1 : 0] b;
output [2*DATA_SIZE - 1 : 0] p;
//输入数据打一个时钟节拍
reg [DATA_SIZE - 1 : 0] a_r;
reg [DATA_SIZE - 1 : 0] b_r;
//输入数据拆半的乘积
wire [DATA_SIZE - 1 : 0] p_tmp00;
wire [DATA_SIZE - 1 : 0] p_tmp01;
wire [DATA_SIZE - 1 : 0] p_tmp10;
wire [DATA_SIZE - 1 : 0] p_tmp11;
reg [2*DATA_SIZE - 1 : 0] sum01;
reg [2*DATA_SIZE - 1 : 0] sum23;
wire [2*DATA_SIZE - 1 : 0] p_tmp;
reg [2*DATA_SIZE - 1 : 0] p;
//输入数据打一个时钟节拍
always@(posedge clk)
if(!rst_n)
begin
a_r <= 4'd0;
b_r <= 4'd0;
end
else
begin
a_r <= a;
b_r <= b;
end
mult_lut_2bit u0_mult_lut_2bit (
.rst_n(rst_n),
.clk(clk),
.a(a_r[1:0]),
.b(b_r[1:0]),
.p(p_tmp00)
);
mult_lut_2bit u1_mult_lut_2bit (
.rst_n(rst_n),
.clk(clk),
.a(a_r[1:0]),
.b(b_r[3:2]),
.p(p_tmp01)
);
mult_lut_2bit u2_mult_lut_2bit (
.rst_n(rst_n),
.clk(clk),
.a(a_r[3:2]),
.b(b_r[1:0]),
.p(p_tmp10)
);
mult_lut_2bit u3_mult_lut_2bit (
.rst_n(rst_n),
.clk(clk),
.a(a_r[3:2]),
.b(b_r[3:2]),
.p(p_tmp11)
);
always@(posedge clk)
if(!rst_n)
begin
sum01 <= 8'd0;
sum23 <= 8'd0;
end
else
begin
sum01 <= p_tmp00 + {p_tmp01,2'b00};
sum23 <= {p_tmp10,2'b00} + {p_tmp11,4'b00};
end
assign p_tmp = sum01 + sum23;
always@(posedge clk)
if(!rst_n)
begin
p <= 8'd0;
end
else
begin
p <= p_tmp;
end
endmodule