手动实现卷积操作

图像卷积操作

程序

import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from io import BytesIO
from PIL import Image
import base64
import pylab



def conv2d(input, filter, stride, padding):
    # batch x height x width x channels
    in_s = input.shape
    # height x width x in_channels x out_channels
    f_s = filter.shape

    '''assert condition:
    ===>
    if not condition:
        raise AssertionError()
    '''
    assert len(in_s) == 4, 'input size rank 4 required!'
    assert len(f_s) == 4, 'filter size rank 4 required!'
    assert f_s[2] == in_s[3], 'intput channels not match filter channels.'
    assert f_s[0] >= stride and f_s[1] >= stride, 'filter should not be less than stride!'
    assert padding in [
        'SAME', 'VALID'], 'padding value[{0}] not allowded!!'.format(padding)

    if padding != 'VALID':

        #判断补多少行，列0
        n = np.array(f_s[:2])-(np.array(in_s[1: 3]) -np.array(f_s[:2]))%stride

        temp=np.zeros((in_s[0],in_s[1]+n[0],in_s[2]+n[1],in_s[3]))
        temp[:,(n[0])//2:in_s[1]+(n[0])//2,n[1]//2:in_s[2]+(n[1])//2,:]=input


        input = temp
        in_s = input.shape

    out_shape = (np.array(in_s[1: 3]) -
                 np.array(f_s[:2]))//stride + 1
    out_shape = np.concatenate([in_s[:1], out_shape, f_s[-1:]])
    output = np.zeros(out_shape)

    for k  in range(out_shape[3]):
        for i in range(out_shape[1]):
            for j in range(out_shape[2]):
                   output[:,i,j,k]= np.sum(input[:,i * stride:i * stride+f_s[0],j * stride :j * stride + f_s[1] ,:] * filter[:,:,:,k],axis=(1,2,3))

    return output,input

#定义一个卷积核
filter = np.random.uniform(size=[3, 3, 3, 8])

#print("Filter:\n")
#print(filter)
padding = 'SAME'
stride = 3

#初始化输入参数
test1=np.arange(150)
test1=test1.reshape((2,5,5,3))

#利用函数求得卷积后输出
output ,input_test= conv2d(test1, filter, stride, padding)
print(output.shape)

''''
#方便直观的显示数据
print("Data:\n")
print(input)

print("Area1:\n")
print(input_test[:,:,:,0])

print("Area2:\n")
print(input_test[:,:,:,1])

print("Area3:\n")
print(input_test[:,:,:,2])
print("OUT:\n")

print(output)
print("OUTArea1:\n")
print(output[:,:,:,0])
print("OUTArea1:\n")
print(output[:,:,:,1])
'''

#利用TF的卷积函数做对比
# 定义输入
input_tensor = tf.placeholder(
    tf.float32, shape=[None, None, None, None], name='input')
filter_tensor = tf.placeholder(
    tf.float32, shape=[None, None, None, None], name='filter')

# 执行卷积
output_tensor = tf.nn.conv2d(
    input_tensor, filter_tensor, padding=padding, strides=[1, stride, stride, 1])

with tf.Session() as sess:
    tf_output = sess.run(output_tensor,feed_dict={input_tensor:test1,filter_tensor:filter})

print(tf_output.shape)

print('TF_OUTPUT:\n')
print(tf_output)

print('OUTPUT:\n')
print(output)


print(np.mean(np.abs(output - tf_output)))
print(np.max(np.abs(output - tf_output)))

程序输出

(2, 2, 2, 8)
(2, 2, 2, 8)
TF_OUTPUT:

[[[[  309.2154541    270.60263062   260.41113281   221.46687317
      227.54600525   230.96282959   230.40983582   267.36032104]
   [  316.57440186   278.57971191   279.80255127   215.61552429
      223.49278259   219.557724     231.22906494   272.26052856]]

  [[  535.97070312   421.85943604   481.73876953   396.71484375
      399.61108398   464.22451782   425.04989624   507.50418091]
   [  459.15145874   409.68347168   381.71875      324.01239014
      309.20901489   339.55886841   294.88568115   415.53204346]]]


 [[[ 1485.45361328  1277.75952148  1282.13769531  1087.30883789
     1081.29650879  1132.98876953  1116.50012207  1298.37841797]
   [ 1196.16577148  1045.47900391  1024.41357422   823.97753906
      839.2565918    819.07800293   837.34814453  1025.48303223]]

  [[ 1257.47180176   999.01391602  1129.00964355   935.97912598
      928.44384766  1072.16748047   989.25793457  1177.82800293]
   [ 1005.9899292    891.35040283   827.52270508   710.12542725
      678.71563721   730.67132568   640.82513428   900.62762451]]]]
OUTPUT:

[[[[  309.2154469    270.60261901   260.41110459   221.46688844
      227.5460131    230.96283544   230.409845     267.36031789]
   [  316.57439972   278.57972561   279.80256835   215.61554813
      223.49277778   219.55772748   231.22905518   272.26053512]]

  [[  535.97074334   421.85937476   481.73879734   396.71489093
      399.61106346   464.22452494   425.04994261   507.50414106]
   [  459.15144808   409.68341065   381.71876253   324.01236221
      309.20903186   339.55885264   294.88570362   415.53204988]]]


 [[[ 1485.45362376  1277.7596486   1282.13778641  1087.30877457
     1081.2966196   1132.98877566  1116.50016085  1298.37845353]
   [ 1196.16573663  1045.47900751  1024.41354102   823.97749289
      839.25663818   819.07797072   837.34816846  1025.48311481]]

  [[ 1257.47182321   999.0138759   1129.00956589   935.97914041
      928.4439143   1072.1674046    989.25805391  1177.82811744]
   [ 1005.98992085   891.35041221   827.52268566   710.12543209
      678.7156959    730.67132869   640.82513323   900.62764016]]]]
3.23503926709e-05
0.0001271201495

重点

程序重点的地方主要在SAME模式下的补0操作和卷积核操作。

手动实现卷积操作

图像卷积操作

程序

程序输出

重点

猜你喜欢