[Deep Learning] Manual implementation of two-dimensional CNN convolution (single/multiple input single/multiple output channels)

Single input single output channel

# 手动实现卷积 单输出单输入通道
def my_conv2d(input_feature_map, kernel, stride=1, padding=0, bias=0):
    if padding > 0:
        input_feature_map = F.pad(input_feature_map, (padding, padding, padding, padding))
    print(input_feature_map)

    input_h, input_w = input_feature_map.shape
    kernel_h, kernel_w = kernel.shape

    output_w = math.floor((input_w - kernel_w + padding + stride) / stride)  # 卷积输出的宽度
    output_h = math.floor((input_h - kernel_h + padding + stride) / stride)  # 卷积输出的高度
    # 可见 要想 输出和输入的feature map大小相同则需要 将padding = kernel_w - 1
    print(f"output_h:{
      
      output_h}, output_w:{
      
      output_w}")

    output = torch.zeros(output_h, output_w)
    for i in range(0, input_h-kernel_h+1, stride):
        for j in range(0, input_w-kernel_w+1, stride):
            region = input_feature_map[i:i+kernel_h, j:j+kernel_w]  # 取出和kernel相同大小的区域在feature map中
            output[int(i / stride), int(j / stride)] = torch.multiply(region, kernel).sum() + bias
    
    return output 

nn.functional.conv2dcompare with

input_feature_map = torch.randint(0, 5, (5, 5)).float()
kernel = torch.ones(3, 3)
stride = 2
padding = 0
bias = torch.randn(1)  # 和输出通道数维度相同 此处默认是1
# 手动实现 默认输出通道为1
output_feature_map = my_conv2d(input_feature_map, kernel, stride, padding, bias)
print(output_feature_map)

# pytorch API
output_api = F.conv2d(input_feature_map.unsqueeze(0).unsqueeze(0),
                        kernel.unsqueeze(0).unsqueeze(0), stride=stride, padding=padding, bias=bias)
print(output_api.squeeze(0).squeeze(0))

result

tensor([[2., 1., 1., 3., 1.],
        [4., 2., 2., 2., 2.],
        [1., 4., 1., 3., 1.],
        [3., 2., 2., 2., 4.],
        [3., 4., 2., 1., 2.]])
output_h:2, output_w:2
tensor([[19.1882, 17.1882],
        [23.1882, 19.1882]])
tensor([[19.1882, 17.1882],
        [23.1882, 19.1882]])

MIMO

# 多输入多输出通道
import math
# 手动实现卷积 (batch_size, C, H, W)
def my_conv2d(input_feature_map, kernel, stride=1, padding=0, bias=0):
    if padding > 0:
        input_feature_map = F.pad(input_feature_map, (padding, padding, padding, padding))
    print(input_feature_map.shape)  # 从最后的维度开始进行pad 比如(3, 5, 5)就先从第三维度开始pad 按照提供的元组大size/2=需要pad的维度个数 一个维度两个pad长度值(前后),默认pad 0

    B, _, input_h, input_w = input_feature_map.shape
    out_channels, in_channels, kernel_h, kernel_w = kernel.shape

    output_w = math.floor((input_w - kernel_w + padding + stride) / stride)  # 卷积输出的宽度
    output_h = math.floor((input_h - kernel_h + padding + stride) / stride)  # 卷积输出的高度
    # 可见 要想 输出和输入的feature map大小相同则需要 将padding = kernel_w - 1
    print(f"output_h:{
      
      output_h}, output_w:{
      
      output_w}")
    output = torch.zeros(B, out_channels, output_h, output_w)

    for b in range(batch_size):
        for out_c in range(out_channels):
            for in_c in range(in_channels):
                for i in range(0, input_h-kernel_h+1, stride):
                    for j in range(0, input_w-kernel_w+1, stride):
                        # print(kernel[out_c, in_c, ...].shape)  # (K, K)
                        region = input_feature_map[b, in_c, i:i+kernel_h, j:j+kernel_w]  # 取出和kernel相同大小的区域在feature map中
                        # print(region.shape)  # (K, K)
                        output[b, out_c, int(i / stride), int(j / stride)] += torch.multiply(region, kernel[out_c, in_c, ...]).sum()
            output[b, out_c, ...] += bias[out_c]
    
    return output 

nn.functional.conv2dcompare with

B = batch_size = 2
C = in_c = in_channels = 3
H = height = 5
W = weight = 5
input_feature_map = torch.randint(0, 5, (B, C, H, W)).float()
K = kernel_size = 3
out_c = out_channels = 2
kernel = torch.ones(out_c, in_c, K, K)
stride = 2
padding = 1
bias = torch.randn(out_c)  # 和输出通道数维度相同
# print(bias)

# 手动实现
output_feature_map = my_conv2d(input_feature_map, kernel, stride, padding, bias)
print(output_feature_map)

# pytorch API funcitonal
api_output_feature_map = F.conv2d(input_feature_map, kernel, stride=stride, padding=padding, bias=bias)
print(api_output_feature_map)

# class Conv2d
# conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=False)
# output_fm = conv_layer(input_feature_map)
# print(output_fm.shape)
# print(conv_layer.weight.shape)  # kernel 的大小和 batch_size 无关

result

torch.Size([2, 3, 7, 7])
output_h:3, output_w:3
tensor([[[[13.8990, 25.8990, 20.8990],
          [27.8990, 46.8990, 28.8990],
          [23.8990, 33.8990, 17.8990]],

         [[11.8909, 23.8909, 18.8909],
          [25.8909, 44.8909, 26.8909],
          [21.8909, 31.8909, 15.8909]]],


        [[[21.8990, 32.8990, 21.8990],
          [43.8990, 68.8990, 43.8990],
          [23.8990, 42.8990, 31.8990]],

         [[19.8909, 30.8909, 19.8909],
          [41.8909, 66.8909, 41.8909],
          [21.8909, 40.8909, 29.8909]]]])
tensor([[[[13.8990, 25.8990, 20.8990],
          [27.8990, 46.8990, 28.8990],
          [23.8990, 33.8990, 17.8990]],

         [[11.8909, 23.8909, 18.8909],
          [25.8909, 44.8909, 26.8909],
          [21.8909, 31.8909, 15.8909]]],


        [[[21.8990, 32.8990, 21.8990],
          [43.8990, 68.8990, 43.8990],
          [23.8990, 42.8990, 31.8990]],

         [[19.8909, 30.8909, 19.8909],
          [41.8909, 66.8909, 41.8909],
          [21.8909, 40.8909, 29.8909]]]])

in conclusion

It can be seen from the above results that manual implementation mainly relies on cyclic implementation. It should be noted that in the manual implementation process, the calculation method of the output feature map shape and the dimension of bias (the dimension of bias is the dimension of the output channel, that is, the output The feature map assigns a bias scalar value to each channel feature map, and then adds the same bias to each pixel value of the feature map).

Guess you like

Origin blog.csdn.net/qq_41139677/article/details/125239002