1.U-netモデル
U-net 構造図
これは論文のネットワーク モデル構造図です。図の出力は 2 次元です。このモデルは主に画像セグメンテーションに使用されていることがわかります。図の構造によると、実装は入力ビット 512x512 ピクチャの場合、2 種類の出力ピクセル予測 (エッジ、非エッジ) があります。
# 输入512*512的照片,3个色彩维度
inputs = keras.layers.Input((512, 512, 3))
conv1 = keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(inputs)
conv1 = keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv1)
pool1 = keras.layers.MaxPool2D((2, 2))(conv1)
conv2 = keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(pool1)
conv2 = keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv2)
pool2 = keras.layers.MaxPool2D((2, 2))(conv2)
conv3 = keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(pool2)
conv3 = keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv3)
pool3 = keras.layers.MaxPool2D((2, 2))(conv3)
conv4 = keras.layers.Conv2D(512, (3, 3), activation='relu', padding='same')(pool3)
conv4 = keras.layers.Conv2D(512, (3, 3), activation='relu', padding='same')(conv4)
pool4 = keras.layers.MaxPool2D((2, 2))(conv4)
conv5 = keras.layers.Conv2D(1024, (3, 3), activation='relu', padding='same')(pool4)
conv5 = keras.layers.Conv2D(1024, (3, 3), activation='relu', padding='same')(conv5)
deconv1 = keras.layers.Conv2D(512, (2, 2), activation='relu', padding='same')(conv5)
upsa1 = keras.layers.UpSampling2D((2, 2))(deconv1)
upsa1 = keras.layers.concatenate([conv4, upsa1], axis=3)
deconv1 = keras.layers.Conv2D(512, (3, 3), activation='relu', padding='same')(upsa1)
deconv1 = keras.layers.Conv2D(512, (3, 3), activation='relu', padding='same')(deconv1)
deconv2 = keras.layers.Conv2D(256, (2, 2), activation='relu', padding='same')(deconv1)
upsa2 = keras.layers.UpSampling2D((2, 2))(deconv2)
upsa2 = keras.layers.concatenate([conv3, upsa2], axis=3)
deconv2 = keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(upsa2)
deconv2 = keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(deconv2)
deconv3 = keras.layers.Conv2D(128, (2, 2), activation='relu', padding='same')(deconv2)
upsa3 = keras.layers.UpSampling2D((2, 2))(deconv3)
upsa3 = keras.layers.concatenate([conv2, upsa3], axis=3)
deconv3 = keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(upsa3)
deconv3 = keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(deconv3)
deconv4 = keras.layers.Conv2D(64, (2, 2), activation='relu', padding='same')(deconv3)
upsa4 = keras.layers.UpSampling2D((2, 2))(deconv4)
upsa4 = keras.layers.concatenate([conv1, upsa4], axis=3)
deconv4 = keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(upsa4)
deconv4 = keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(deconv4)
output = keras.layers.Conv2D(2, (1, 1), activation='softmax', padding='same')(deconv4)
model = keras.models.Model(inputs=inputs, outputs=output)
model.summary()
model.compile(optimizer=keras.optimizers.Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
イテレータを使用してデータを非同期的に読み取ると、実行速度が向上します。
NVIDA 1660 グラフィックス カードでメモリを動的に割り当てる
physical_devices = config.list_physical_devices('GPU')
config.experimental.set_memory_growth(physical_devices[0], True)
2. DeepLab V3 モデル
DeepLab V3 モデルは主に画像のセマンティック セグメンテーションに使用され、出力サイズは元の画像の 16 倍異なります。
DeepLab v3 は ResNet 50/101 モデルに基づいています。次の図は ResNet モデルの構造です。出力ストライドは、元の画像サイズと出力サイズの比率を表します。deeplabv3 の Block3 の前後 (Block3 を含む)、レスネットと同じ。
ボトルネック構造
ResNet には、ビルディング ブロックとボトムレンク ブロックという 2 つのブロック構造があります。ボトルネック構造は DeepLab V3 モデルで使用されており、構造は DeepLab v3 で調整されます。
pytorch を使用して事前トレーニングされたモデルをダウンロードする
pytorchでダウンロードしたモデルと公式モデルには若干の違いがあります。
keras は Deeplab v3 を実装します
紙面のモデルとは若干の違いがございますが、
from tensorflow import keras as keras
from tensorflow import config as config
physical_devices = config.list_physical_devices('GPU')
config.experimental.set_memory_growth(physical_devices[0], True)
def deeplab3(input_size=(512, 512), num_classes=21):
inputs = keras.Input(shape=input_size + (3,))
net = keras.layers.Conv2D(64, (7, 7), strides=(2, 2), padding='same')(inputs)
net = keras.layers.MaxPool2D((3, 3), strides=(2, 2), padding='same')(net)
# Block 1
net = bottleneck(64, 64, 256, strides=(1, 1))(net)
net = bottleneck(256, 64, 256, strides=(1, 1))(net)
net = bottleneck(256, 64, 256, strides=(2, 2))(net)
# Block 2
net = bottleneck(256, 128, 512, strides=(1, 1))(net)
net = bottleneck(512, 128, 512, strides=(1, 1))(net)
net = bottleneck(512, 128, 512, strides=(1, 1))(net)
net = bottleneck(512, 128, 512, strides=(2, 2))(net)
# Block 3
net = bottleneck(512, 256, 1024, strides=(1, 1))(net)
for i in range(22):
net = bottleneck(1024, 256, 1024, strides=(1, 1))(net)
net = bottleneck(1024, 256, 1024, strides=(1, 1))(net)
# Block 4
net = bottleneck(1024, 512, 2048, strides=(1, 1), rate=1)(net)
net = bottleneck(2048, 512, 2048, strides=(1, 1), rate=2)(net)
net = bottleneck(2048, 512, 2048, strides=(1, 1), rate=4)(net)
# Atrous Spatial Pyramid Pooling
# part a
net_p1 = keras.layers.Conv2D(256, (1, 1), padding='same')(net)
net_p2 = keras.layers.Conv2D(256, (3, 3), padding='same', dilation_rate=(6, 6))(net)
net_p3 = keras.layers.Conv2D(256, (3, 3), padding='same', dilation_rate=(12, 12))(net)
net_p4 = keras.layers.Conv2D(256, (3, 3), padding='same', dilation_rate=(18, 18))(net)
# part b
shape_in = net.shape[1:3]
pooled = keras.layers.GlobalAvgPool2D()(net)
pooled = keras.layers.Reshape(target_shape=(-1, 1, 2048))(pooled)
pooled = keras.layers.Conv2D(256, (1, 1))(pooled)
pooled = keras.layers.BatchNormalization()(pooled)
pooled = keras.layers.UpSampling2D(shape_in)(pooled)
net = keras.layers.concatenate([net_p1, net_p2, net_p3, net_p4, pooled])
net = keras.layers.Conv2D(256, (3, 3), strides=(1, 1), padding='same')(net)
outputs = keras.layers.Conv2D(num_classes, (1, 1), strides=(1, 1), padding='same', activation='relu')(net)
model = keras.Model(inputs, outputs)
return model
def bottleneck(depth_input, depth_bottleneck, depth_output, strides=(1, 1), rate=1):
def fun(inputs):
if depth_input == depth_output:
if strides == (1, 1):
shortcut = inputs
else:
shortcut = keras.layers.MaxPool2D((1, 1), strides=strides, padding='same')(inputs)
else:
shortcut = keras.layers.Conv2D(depth_output, (1, 1), strides=strides, padding='same')(inputs)
residual = keras.layers.Conv2D(depth_bottleneck, (1, 1), padding='same')(inputs)
residual = keras.layers.BatchNormalization()(residual)
residual = keras.layers.Conv2D(depth_bottleneck, (3, 3), strides=strides, padding='same', dilation_rate=rate)(
residual)
residual = keras.layers.BatchNormalization()(residual)
residual = keras.layers.Conv2D(depth_output, (1, 1), padding='same')(residual)
residual = keras.layers.BatchNormalization()(residual)
outputs = keras.layers.ReLU()(shortcut + residual)
outputs = keras.layers.BatchNormalization()(outputs)
return outputs
return fun