Single input single output channel
# 手动实现卷积 单输出单输入通道
def my_conv2d(input_feature_map, kernel, stride=1, padding=0, bias=0):
if padding > 0:
input_feature_map = F.pad(input_feature_map, (padding, padding, padding, padding))
print(input_feature_map)
input_h, input_w = input_feature_map.shape
kernel_h, kernel_w = kernel.shape
output_w = math.floor((input_w - kernel_w + padding + stride) / stride) # 卷积输出的宽度
output_h = math.floor((input_h - kernel_h + padding + stride) / stride) # 卷积输出的高度
# 可见 要想 输出和输入的feature map大小相同则需要 将padding = kernel_w - 1
print(f"output_h:{
output_h}, output_w:{
output_w}")
output = torch.zeros(output_h, output_w)
for i in range(0, input_h-kernel_h+1, stride):
for j in range(0, input_w-kernel_w+1, stride):
region = input_feature_map[i:i+kernel_h, j:j+kernel_w] # 取出和kernel相同大小的区域在feature map中
output[int(i / stride), int(j / stride)] = torch.multiply(region, kernel).sum() + bias
return output
nn.functional.conv2d
compare with
input_feature_map = torch.randint(0, 5, (5, 5)).float()
kernel = torch.ones(3, 3)
stride = 2
padding = 0
bias = torch.randn(1) # 和输出通道数维度相同 此处默认是1
# 手动实现 默认输出通道为1
output_feature_map = my_conv2d(input_feature_map, kernel, stride, padding, bias)
print(output_feature_map)
# pytorch API
output_api = F.conv2d(input_feature_map.unsqueeze(0).unsqueeze(0),
kernel.unsqueeze(0).unsqueeze(0), stride=stride, padding=padding, bias=bias)
print(output_api.squeeze(0).squeeze(0))
result
tensor([[2., 1., 1., 3., 1.],
[4., 2., 2., 2., 2.],
[1., 4., 1., 3., 1.],
[3., 2., 2., 2., 4.],
[3., 4., 2., 1., 2.]])
output_h:2, output_w:2
tensor([[19.1882, 17.1882],
[23.1882, 19.1882]])
tensor([[19.1882, 17.1882],
[23.1882, 19.1882]])
MIMO
# 多输入多输出通道
import math
# 手动实现卷积 (batch_size, C, H, W)
def my_conv2d(input_feature_map, kernel, stride=1, padding=0, bias=0):
if padding > 0:
input_feature_map = F.pad(input_feature_map, (padding, padding, padding, padding))
print(input_feature_map.shape) # 从最后的维度开始进行pad 比如(3, 5, 5)就先从第三维度开始pad 按照提供的元组大size/2=需要pad的维度个数 一个维度两个pad长度值(前后),默认pad 0
B, _, input_h, input_w = input_feature_map.shape
out_channels, in_channels, kernel_h, kernel_w = kernel.shape
output_w = math.floor((input_w - kernel_w + padding + stride) / stride) # 卷积输出的宽度
output_h = math.floor((input_h - kernel_h + padding + stride) / stride) # 卷积输出的高度
# 可见 要想 输出和输入的feature map大小相同则需要 将padding = kernel_w - 1
print(f"output_h:{
output_h}, output_w:{
output_w}")
output = torch.zeros(B, out_channels, output_h, output_w)
for b in range(batch_size):
for out_c in range(out_channels):
for in_c in range(in_channels):
for i in range(0, input_h-kernel_h+1, stride):
for j in range(0, input_w-kernel_w+1, stride):
# print(kernel[out_c, in_c, ...].shape) # (K, K)
region = input_feature_map[b, in_c, i:i+kernel_h, j:j+kernel_w] # 取出和kernel相同大小的区域在feature map中
# print(region.shape) # (K, K)
output[b, out_c, int(i / stride), int(j / stride)] += torch.multiply(region, kernel[out_c, in_c, ...]).sum()
output[b, out_c, ...] += bias[out_c]
return output
nn.functional.conv2d
compare with
B = batch_size = 2
C = in_c = in_channels = 3
H = height = 5
W = weight = 5
input_feature_map = torch.randint(0, 5, (B, C, H, W)).float()
K = kernel_size = 3
out_c = out_channels = 2
kernel = torch.ones(out_c, in_c, K, K)
stride = 2
padding = 1
bias = torch.randn(out_c) # 和输出通道数维度相同
# print(bias)
# 手动实现
output_feature_map = my_conv2d(input_feature_map, kernel, stride, padding, bias)
print(output_feature_map)
# pytorch API funcitonal
api_output_feature_map = F.conv2d(input_feature_map, kernel, stride=stride, padding=padding, bias=bias)
print(api_output_feature_map)
# class Conv2d
# conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=False)
# output_fm = conv_layer(input_feature_map)
# print(output_fm.shape)
# print(conv_layer.weight.shape) # kernel 的大小和 batch_size 无关
result
torch.Size([2, 3, 7, 7])
output_h:3, output_w:3
tensor([[[[13.8990, 25.8990, 20.8990],
[27.8990, 46.8990, 28.8990],
[23.8990, 33.8990, 17.8990]],
[[11.8909, 23.8909, 18.8909],
[25.8909, 44.8909, 26.8909],
[21.8909, 31.8909, 15.8909]]],
[[[21.8990, 32.8990, 21.8990],
[43.8990, 68.8990, 43.8990],
[23.8990, 42.8990, 31.8990]],
[[19.8909, 30.8909, 19.8909],
[41.8909, 66.8909, 41.8909],
[21.8909, 40.8909, 29.8909]]]])
tensor([[[[13.8990, 25.8990, 20.8990],
[27.8990, 46.8990, 28.8990],
[23.8990, 33.8990, 17.8990]],
[[11.8909, 23.8909, 18.8909],
[25.8909, 44.8909, 26.8909],
[21.8909, 31.8909, 15.8909]]],
[[[21.8990, 32.8990, 21.8990],
[43.8990, 68.8990, 43.8990],
[23.8990, 42.8990, 31.8990]],
[[19.8909, 30.8909, 19.8909],
[41.8909, 66.8909, 41.8909],
[21.8909, 40.8909, 29.8909]]]])
in conclusion
It can be seen from the above results that manual implementation mainly relies on cyclic implementation. It should be noted that in the manual implementation process, the calculation method of the output feature map shape and the dimension of bias (the dimension of bias is the dimension of the output channel, that is, the output The feature map assigns a bias scalar value to each channel feature map, and then adds the same bias to each pixel value of the feature map).