图像卷积操作
程序
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from io import BytesIO
from PIL import Image
import base64
import pylab
def conv2d(input, filter, stride, padding):
# batch x height x width x channels
in_s = input.shape
# height x width x in_channels x out_channels
f_s = filter.shape
'''assert condition:
===>
if not condition:
raise AssertionError()
'''
assert len(in_s) == 4, 'input size rank 4 required!'
assert len(f_s) == 4, 'filter size rank 4 required!'
assert f_s[2] == in_s[3], 'intput channels not match filter channels.'
assert f_s[0] >= stride and f_s[1] >= stride, 'filter should not be less than stride!'
assert padding in [
'SAME', 'VALID'], 'padding value[{0}] not allowded!!'.format(padding)
if padding != 'VALID':
#判断补多少行,列0
n = np.array(f_s[:2])-(np.array(in_s[1: 3]) -np.array(f_s[:2]))%stride
temp=np.zeros((in_s[0],in_s[1]+n[0],in_s[2]+n[1],in_s[3]))
temp[:,(n[0])//2:in_s[1]+(n[0])//2,n[1]//2:in_s[2]+(n[1])//2,:]=input
input = temp
in_s = input.shape
out_shape = (np.array(in_s[1: 3]) -
np.array(f_s[:2]))//stride + 1
out_shape = np.concatenate([in_s[:1], out_shape, f_s[-1:]])
output = np.zeros(out_shape)
for k in range(out_shape[3]):
for i in range(out_shape[1]):
for j in range(out_shape[2]):
output[:,i,j,k]= np.sum(input[:,i * stride:i * stride+f_s[0],j * stride :j * stride + f_s[1] ,:] * filter[:,:,:,k],axis=(1,2,3))
return output,input
#定义一个卷积核
filter = np.random.uniform(size=[3, 3, 3, 8])
#print("Filter:\n")
#print(filter)
padding = 'SAME'
stride = 3
#初始化输入参数
test1=np.arange(150)
test1=test1.reshape((2,5,5,3))
#利用函数求得卷积后输出
output ,input_test= conv2d(test1, filter, stride, padding)
print(output.shape)
''''
#方便直观的显示数据
print("Data:\n")
print(input)
print("Area1:\n")
print(input_test[:,:,:,0])
print("Area2:\n")
print(input_test[:,:,:,1])
print("Area3:\n")
print(input_test[:,:,:,2])
print("OUT:\n")
print(output)
print("OUTArea1:\n")
print(output[:,:,:,0])
print("OUTArea1:\n")
print(output[:,:,:,1])
'''
#利用TF的卷积函数做对比
# 定义输入
input_tensor = tf.placeholder(
tf.float32, shape=[None, None, None, None], name='input')
filter_tensor = tf.placeholder(
tf.float32, shape=[None, None, None, None], name='filter')
# 执行卷积
output_tensor = tf.nn.conv2d(
input_tensor, filter_tensor, padding=padding, strides=[1, stride, stride, 1])
with tf.Session() as sess:
tf_output = sess.run(output_tensor,feed_dict={input_tensor:test1,filter_tensor:filter})
print(tf_output.shape)
print('TF_OUTPUT:\n')
print(tf_output)
print('OUTPUT:\n')
print(output)
print(np.mean(np.abs(output - tf_output)))
print(np.max(np.abs(output - tf_output)))
程序输出
(2, 2, 2, 8)
(2, 2, 2, 8)
TF_OUTPUT:
[[[[ 309.2154541 270.60263062 260.41113281 221.46687317
227.54600525 230.96282959 230.40983582 267.36032104]
[ 316.57440186 278.57971191 279.80255127 215.61552429
223.49278259 219.557724 231.22906494 272.26052856]]
[[ 535.97070312 421.85943604 481.73876953 396.71484375
399.61108398 464.22451782 425.04989624 507.50418091]
[ 459.15145874 409.68347168 381.71875 324.01239014
309.20901489 339.55886841 294.88568115 415.53204346]]]
[[[ 1485.45361328 1277.75952148 1282.13769531 1087.30883789
1081.29650879 1132.98876953 1116.50012207 1298.37841797]
[ 1196.16577148 1045.47900391 1024.41357422 823.97753906
839.2565918 819.07800293 837.34814453 1025.48303223]]
[[ 1257.47180176 999.01391602 1129.00964355 935.97912598
928.44384766 1072.16748047 989.25793457 1177.82800293]
[ 1005.9899292 891.35040283 827.52270508 710.12542725
678.71563721 730.67132568 640.82513428 900.62762451]]]]
OUTPUT:
[[[[ 309.2154469 270.60261901 260.41110459 221.46688844
227.5460131 230.96283544 230.409845 267.36031789]
[ 316.57439972 278.57972561 279.80256835 215.61554813
223.49277778 219.55772748 231.22905518 272.26053512]]
[[ 535.97074334 421.85937476 481.73879734 396.71489093
399.61106346 464.22452494 425.04994261 507.50414106]
[ 459.15144808 409.68341065 381.71876253 324.01236221
309.20903186 339.55885264 294.88570362 415.53204988]]]
[[[ 1485.45362376 1277.7596486 1282.13778641 1087.30877457
1081.2966196 1132.98877566 1116.50016085 1298.37845353]
[ 1196.16573663 1045.47900751 1024.41354102 823.97749289
839.25663818 819.07797072 837.34816846 1025.48311481]]
[[ 1257.47182321 999.0138759 1129.00956589 935.97914041
928.4439143 1072.1674046 989.25805391 1177.82811744]
[ 1005.98992085 891.35041221 827.52268566 710.12543209
678.7156959 730.67132869 640.82513323 900.62764016]]]]
3.23503926709e-05
0.0001271201495
重点
程序重点的地方主要在SAME模式下的补0操作和卷积核操作。