1. SegNet
Segmentation is one of the main tasks of computer vision. For each pixel of the image, you must specify the class (including the background). Semantic segmentation only tells pixel classes, instance segmentation divides classes into distinct instances.
There are different neural architectures for segmentation models, but almost all of them have the same structure. The first part is the encoder that extracts features from the input image, and the second part is the decoder, which transforms these features into an image with the same height and width and some number of channels, possibly equal to the number of classes.
The simplest encoder-decoder architecture is called SegNet . It uses a standard CNN with convolution and pooling in the encoder and a deconvolutional CNN including convolution and upsampling in the decoder. It also relies on batch normalization to successfully train multilayer networks.
Paper address
https://arxiv.org/pdf/1511.00561.pdf https://arxiv.org/pdf/1511.00561.pdf
Second, the sample data set
The increased incidence of melanoma has recently prompted the development of computer-aided diagnostic systems for classification of dermoscopic images. The PH² dataset was developed for research and benchmarking purposes to facilitate comparative studies of segmentation and classification algorithms for dermoscopy images. PH² is a dermoscopic image database purchased from the Department of Dermatology, Hospital Pedro Hispano, Matosinhos, Portugal.
https://www.fc.up.pt/addi/ph2%20database.html https://www.fc.up.pt/addi/ph2%20database.html
Baidu network disk download
链接:https://pan.baidu.com/s/1I8Zwwn8XZPkzRwchKBgWZg?pwd=uyi9
提取码:uyi9
3. Pytorch code reference
1. Import the package
import torch
import torchvision
import matplotlib.pyplot as plt
from torchvision import transforms
from torch import nn
from torch import optim
from tqdm import tqdm
import numpy as np
import torch.nn.functional as F
from skimage.io import imread
from skimage.transform import resize
import os
torch.manual_seed(42)
np.random.seed(42)
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
train_size = 0.9
lr = 1e-3
weight_decay = 1e-6
batch_size = 32
epochs = 30
2. Download processing data
The code below downloads the dataset from the original location and unzips it. You need to install the unrar utility for this code to work, you can install it on Linux with sudo apt-get install unrar, or download the command line version for Windows here .
#!apt-get install rar
!wget https://www.dropbox.com/s/k88qukc20ljnbuo/PH2Dataset.rar
!unrar x -Y PH2Dataset.rar
Now we will define the code to load the dataset. We converted all images to 256x256 size and split the dataset into training and testing parts. This function returns training and test datasets, each containing the original image and a mask outlining the mole.
def load_dataset(train_part, root='PH2Dataset'):
images = []
masks = []
for root, dirs, files in os.walk(os.path.join(root, 'PH2 Dataset images')):
if root.endswith('_Dermoscopic_Image'):
images.append(imread(os.path.join(root, files[0])))
if root.endswith('_lesion'):
masks.append(imread(os.path.join(root, files[0])))
size = (256, 256)
images = torch.permute(torch.FloatTensor(np.array([resize(image, size, mode='constant', anti_aliasing=True,) for image in images])), (0, 3, 1, 2))
masks = torch.FloatTensor(np.array([resize(mask, size, mode='constant', anti_aliasing=False) > 0.5 for mask in masks])).unsqueeze(1)
indices = np.random.permutation(range(len(images)))
train_part = int(train_part * len(images))
train_ind = indices[:train_part]
test_ind = indices[train_part:]
train_dataset = (images[train_ind, :, :, :], masks[train_ind, :, :, :])
test_dataset = (images[test_ind, :, :, :], masks[test_ind, :, :, :])
return train_dataset, test_dataset
train_dataset, test_dataset = load_dataset(train_size)
Now let's plot some images from the dataset and see what they look like:
def plotn(n, data, only_mask=False):
images, masks = data[0], data[1]
fig, ax = plt.subplots(1, n)
fig1, ax1 = plt.subplots(1, n)
for i, (img, mask) in enumerate(zip(images, masks)):
if i == n:
break
if not only_mask:
ax[i].imshow(torch.permute(img, (1, 2, 0)))
else:
ax[i].imshow(img[0])
ax1[i].imshow(mask[0])
ax[i].axis('off')
ax1[i].axis('off')
plt.show()
plotn(5, train_dataset)
We also need a data loader to feed data into our neural network.
train_dataloader = torch.utils.data.DataLoader(list(zip(train_dataset[0], train_dataset[1])), batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(list(zip(test_dataset[0], test_dataset[1])), batch_size=1, shuffle=False)
dataloaders = (train_dataloader, test_dataloader)
3. Definition and training
class SegNet(nn.Module):
def __init__(self):
super().__init__()
self.enc_conv0 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3,3), padding=1)
self.act0 = nn.ReLU()
self.bn0 = nn.BatchNorm2d(16)
self.pool0 = nn.MaxPool2d(kernel_size=(2,2))
self.enc_conv1 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), padding=1)
self.act1 = nn.ReLU()
self.bn1 = nn.BatchNorm2d(32)
self.pool1 = nn.MaxPool2d(kernel_size=(2,2))
self.enc_conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), padding=1)
self.act2 = nn.ReLU()
self.bn2 = nn.BatchNorm2d(64)
self.pool2 = nn.MaxPool2d(kernel_size=(2,2))
self.enc_conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), padding=1)
self.act3 = nn.ReLU()
self.bn3 = nn.BatchNorm2d(128)
self.pool3 = nn.MaxPool2d(kernel_size=(2,2))
self.bottleneck_conv = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3,3), padding=1)
self.upsample0 = nn.UpsamplingBilinear2d(scale_factor=2)
self.dec_conv0 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=(3,3), padding=1)
self.dec_act0 = nn.ReLU()
self.dec_bn0 = nn.BatchNorm2d(128)
self.upsample1 = nn.UpsamplingBilinear2d(scale_factor=2)
self.dec_conv1 = nn.Conv2d(in_channels=128, out_channels=64, kernel_size=(3,3), padding=1)
self.dec_act1 = nn.ReLU()
self.dec_bn1 = nn.BatchNorm2d(64)
self.upsample2 = nn.UpsamplingBilinear2d(scale_factor=2)
self.dec_conv2 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=(3,3), padding=1)
self.dec_act2 = nn.ReLU()
self.dec_bn2 = nn.BatchNorm2d(32)
self.upsample3 = nn.UpsamplingBilinear2d(scale_factor=2)
self.dec_conv3 = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=(1,1))
self.sigmoid = nn.Sigmoid()
def forward(self, x):
e0 = self.pool0(self.bn0(self.act0(self.enc_conv0(x))))
e1 = self.pool1(self.bn1(self.act1(self.enc_conv1(e0))))
e2 = self.pool2(self.bn2(self.act2(self.enc_conv2(e1))))
e3 = self.pool3(self.bn3(self.act3(self.enc_conv3(e2))))
b = self.bottleneck_conv(e3)
d0 = self.dec_bn0(self.dec_act0(self.dec_conv0(self.upsample0(b))))
d1 = self.dec_bn1(self.dec_act1(self.dec_conv1(self.upsample1(d0))))
d2 = self.dec_bn2(self.dec_act2(self.dec_conv2(self.upsample2(d1))))
d3 = self.sigmoid(self.dec_conv3(self.upsample3(d2)))
return d3
Special mention should be made of the loss function used for segmentation. In a classic autoencoder, the similarity between two images needs to be measured, which can be done using mean squared error. In segmentation, each pixel in the target mask image represents a class number (one-hot encoded along the third dimension), so we need to average all pixels using a classification-specific loss function - cross-entropy loss. If the mask is binary a binary cross-entropy loss (BCE) will be used.
model = SegNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
loss_fn = nn.BCEWithLogitsLoss()
Training loop:
def train(dataloaders, model, loss_fn, optimizer, epochs, device):
tqdm_iter = tqdm(range(epochs))
train_dataloader, test_dataloader = dataloaders[0], dataloaders[1]
for epoch in tqdm_iter:
model.train()
train_loss = 0.0
test_loss = 0.0
for batch in train_dataloader:
imgs, labels = batch
imgs = imgs.to(device)
labels = labels.to(device)
preds = model(imgs)
loss = loss_fn(preds, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
model.eval()
with torch.no_grad():
for batch in test_dataloader:
imgs, labels = batch
imgs = imgs.to(device)
labels = labels.to(device)
preds = model(imgs)
loss = loss_fn(preds, labels)
test_loss += loss.item()
train_loss /= len(train_dataloader)
test_loss /= len(test_dataloader)
tqdm_dct = {'train loss:': train_loss, 'test loss:': test_loss}
tqdm_iter.set_postfix(tqdm_dct, refresh=True)
tqdm_iter.refresh()
train(dataloaders, model, loss_fn, optimizer, epochs, device)
4. Evaluation model
model.eval()
predictions = []
image_mask = []
plots = 5
images, masks = test_dataset[0], test_dataset[1]
for i, (img, mask) in enumerate(zip(images, masks)):
if i == plots:
break
img = img.to(device).unsqueeze(0)
predictions.append((model(img).detach().cpu()[0] > 0.5).float())
image_mask.append(mask)
plotn(plots, (predictions, image_mask), only_mask=True)
Fourth, Tensorflow code reference
1. Import the package
import tensorflow as tf
import tensorflow.keras.layers as keras
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
from skimage.io import imread
from skimage.transform import resize
import os
import tensorflow.keras.optimizers as optimizers
import tensorflow.keras.losses as losses
from tensorflow.keras.preprocessing.image import ImageDataGenerator
tf.random.set_seed(42)
np.random.seed(42)
train_size = 0.8
lr = 3e-4
weight_decay = 8e-9
batch_size = 64
epochs = 100
2. Process the dataset
!apt-get install rar
!wget https://www.dropbox.com/s/k88qukc20ljnbuo/PH2Dataset.rar
!unrar x -Y PH2Dataset.rar
def load_dataset(train_part, root='PH2Dataset'):
images = []
masks = []
for root, dirs, files in os.walk(os.path.join(root, 'PH2 Dataset images')):
if root.endswith('_Dermoscopic_Image'):
images.append(imread(os.path.join(root, files[0])))
if root.endswith('_lesion'):
masks.append(imread(os.path.join(root, files[0])))
size = (256, 256)
images = np.array([resize(image, size, mode='constant', anti_aliasing=True,) for image in images])
masks = np.expand_dims(np.array([resize(mask, size, mode='constant', anti_aliasing=False) > 0.5 for mask in masks]), axis=3)
indices = np.random.permutation(range(len(images)))
train_part = int(train_part * len(images))
train_ind = indices[:train_part]
test_ind = indices[train_part:]
X_train = tf.cast(images[train_ind, :, :, :], tf.float32)
y_train = tf.cast(masks[train_ind, :, :, :], tf.float32)
X_test = tf.cast(images[test_ind, :, :, :], tf.float32)
y_test = tf.cast(masks[test_ind, :, :, :], tf.float32)
return (X_train, y_train), (X_test, y_test)
(X_train, y_train), (X_test, y_test) = load_dataset(train_size)
def plotn(n, data):
images, masks = data[0], data[1]
fig, ax = plt.subplots(1, n)
fig1, ax1 = plt.subplots(1, n)
for i, (img, mask) in enumerate(zip(images, masks)):
if i == n:
break
ax[i].imshow(img)
ax1[i].imshow(mask[:, :, 0])
plt.show()
3. Define the model and train it
class SegNet(tf.keras.Model):
def __init__(self):
super().__init__()
self.enc_conv0 = keras.Conv2D(16, kernel_size=3, padding='same')
self.bn0 = keras.BatchNormalization()
self.relu0 = keras.Activation('relu')
self.pool0 = keras.MaxPool2D()
self.enc_conv1 = keras.Conv2D(32, kernel_size=3, padding='same')
self.relu1 = keras.Activation('relu')
self.bn1 = keras.BatchNormalization()
self.pool1 = keras.MaxPool2D()
self.enc_conv2 = keras.Conv2D(64, kernel_size=3, padding='same')
self.relu2 = keras.Activation('relu')
self.bn2 = keras.BatchNormalization()
self.pool2 = keras.MaxPool2D()
self.enc_conv3 = keras.Conv2D(128, kernel_size=3, padding='same')
self.relu3 = keras.Activation('relu')
self.bn3 = keras.BatchNormalization()
self.pool3 = keras.MaxPool2D()
self.bottleneck_conv = keras.Conv2D(256, kernel_size=(3, 3), padding='same')
self.upsample0 = keras.UpSampling2D(interpolation='bilinear')
self.dec_conv0 = keras.Conv2D(128, kernel_size=3, padding='same')
self.dec_relu0 = keras.Activation('relu')
self.dec_bn0 = keras.BatchNormalization()
self.upsample1 = keras.UpSampling2D(interpolation='bilinear')
self.dec_conv1 = keras.Conv2D(64, kernel_size=3, padding='same')
self.dec_relu1 = keras.Activation('relu')
self.dec_bn1 = keras.BatchNormalization()
self.upsample2 = keras.UpSampling2D(interpolation='bilinear')
self.dec_conv2 = keras.Conv2D(32, kernel_size=3, padding='same')
self.dec_relu2 = keras.Activation('relu')
self.dec_bn2 = keras.BatchNormalization()
self.upsample3 = keras.UpSampling2D(interpolation='bilinear')
self.dec_conv3 = keras.Conv2D(1, kernel_size=1)
def call(self, input):
e0 = self.pool0(self.relu0(self.bn0(self.enc_conv0(input))))
e1 = self.pool1(self.relu1(self.bn1(self.enc_conv1(e0))))
e2 = self.pool2(self.relu2(self.bn2(self.enc_conv2(e1))))
e3 = self.pool3(self.relu3(self.bn3(self.enc_conv3(e2))))
b = self.bottleneck_conv(e3)
d0 = self.dec_relu0(self.dec_bn0(self.upsample0(self.dec_conv0(b))))
d1 = self.dec_relu1(self.dec_bn1(self.upsample1(self.dec_conv1(d0))))
d2 = self.dec_relu2(self.dec_bn2(self.upsample2(self.dec_conv2(d1))))
d3 = self.dec_conv3(self.upsample3(d2))
return d3
model = SegNet()
optimizer = optimizers.Adam(learning_rate=lr, decay=weight_decay)
loss_fn = losses.BinaryCrossentropy(from_logits=True)
model.compile(loss=loss_fn, optimizer=optimizer)
def train(datasets, model, epochs, batch_size):
train_dataset, test_dataset = datasets[0], datasets[1]
model.fit(train_dataset[0], train_dataset[1],
epochs=epochs,
batch_size=batch_size,
shuffle=True,
validation_data=(test_dataset[0], test_dataset[1]))
train(((X_train, y_train), (X_test, y_test)), model, epochs, batch_size)
4. Verify the model
predictions = []
image_mask = []
plots = 5
for i, (img, mask) in enumerate(zip(X_test, y_test)):
if i == plots:
break
img = tf.expand_dims(img, 0)
pred = np.array(model.predict(img))
predictions.append(pred[0, :, :, 0] > 0.5)
image_mask.append(mask)
plotn(plots, (predictions, image_mask))