keras实现图像语义分割的子函数

查看第0层权重：

weights = model.get_weights()[0]

参考

图像augmentation及预训练参考

定义损失函数，dice_loss

调色盘

将包含训练、测试、验证集的图像文件夹CamVid放在当前目录下，导入图像数据代码如下：

import util
import numpy as np
import os
DataPath='/CamVid/'
def load_data(mode):
    data=[]
    label=[]
    with open(os.getcwd()+DataPath+mode+'.txt') as f:
        txt=f.readlines()
        txt=[line.split(' ') for line in txt]
    for i in range(len(txt)):
        img_data,img_h,img_w=util.get_preprocessed_image(os.getcwd()+txt[i][0])
        data.append(img_data)
        img_label,y_h,y_w=util.get_preprocessed_label(os.getcwd()+txt[i][1][:-1])
        label.append(img_label)
    return np.array(data),np.array(label)

其中训练样本保存在文件夹train中，其ground truth保存在trainannot文件夹中，ground truth是一通道图像，每个像素是类别索引（0~11）。保存训练图像和ground truth路径和名字的文本为train.txt，内容格式为：

/CamVid/train/0001TP_006690.png  /CamVid/trainannot/0001TP_006690.png

如果导入的是训练集，则

train_data, train_label = load_data("train")
np.save("data/train_data.npy", train_data)

util如下：

import numpy as np
from PIL import Image
from keras.applications.vgg16 import preprocess_input

def one_hot_it(labels,h,w):
    x=np.zeros([h,w,12])
    for i in range(h):
        for j in range(w):
            x[i,j,int(labels[i][j])]=1
    return x

def get_preprocessed_label(file_name):    #from 1-channel index map to 1-channel binary index map
    im=np.array(Image.open(file_name))
    img_h,img_w=im.shape
    im=one_hot_it(im,img_h,img_w)
    if img_h>500 or img_w>500:
        raise ValueError("Please resize your images to be not bigger than 500 x 500")
    pad_h=500-img_h
    pad_w=500-img_w
    im=np.pad(im,pad_width=((0,pad_h),(0,pad_w),(0,0)),mode='constant',constant_values=0)
    return im.reshape(500,500,12),img_h,img_w

def get_preprocessed_image(file_name):
    #Note: channels last
    im=np.array(Image.open(file_name)).astype(np.float32)
    assert im.ndim==3,"Only RGB images are supported"
    img_h,img_w,img_c=im.shape
    assert img_c==3,"Only RGB images are supported"
    if img_h>500 or img_w>500:
        raise ValueError("Please resize your images to be not bigger than 500 x 500")
    im=preprocess_input(im)
    pad_h=500-img_h
    pad_w=500-img_w
    im=np.pad(im,pad_width=((0,pad_h),(0,pad_w),(0,0)),mode='constant',constant_values=0)
    return im.astype(np.float32).reshape(500,500,3),img_h,img_w

def get_label_image(probs, img_h, img_w):
    """ Returns the label image (PNG with Pascal VOC colormap) given the probabilities.Channels_last.
    """
    labels = probs.argmax(axis=2).astype("uint8")[:img_h, :img_w]
    label_im = Image.fromarray(labels, "P")
    label_im.putpalette(_PALETTE)
    return label_im

其中one_hot_it可用keras自带to_categorical实现：

from keras.utils.np_utils import to_categorical
categorical_labels = to_categorical(imgs, num_classes)
label=categorical_labels.reshape(imgs_num,img_rows,img_cols,num_classes)

将ground truth显示为彩色的代码如下：

import numpy as np
import os
from PIL import Image
import util

_PALETTE = [0, 0, 0,
           128, 0, 0,
           0, 128, 0,
           128, 128, 0,
           0, 0, 128,
           128, 0, 128,
           0, 128, 128,
           128, 128, 128,
           64, 0, 0,
           192, 0, 0,
           64, 128, 0,
           192, 128, 0,
           64, 0, 128,
           192, 0, 128,
           64, 128, 128,
           192, 128, 128,
           0, 64, 0,
           128, 64, 0,
           0, 192, 0,
           128, 192, 0,
           0, 64, 128,
           128, 64, 128,
           0, 192, 128,
           128, 192, 128,
           64, 64, 0,
           192, 64, 0,
           64, 192, 0,
           192, 192, 0]
_COLOR_LIST=np.array(_PALETTE).reshape(28,3).tolist()

def get_rgb(single,h,w):     #from index map to rgb 
    x=np.zeros([h,w,3])
    for i in range(h):
        for j in range (w):
            r,g,b=_COLOR_LIST[int(single[i][j])]
            x[i,j,0]=r
            x[i,j,1]=g
            x[i,j,2]=b
    r = Image.fromarray(x[:,:,0]).convert('L')
    g = Image.fromarray(x[:,:,1]).convert('L')
    b = Image.fromarray(x[:,:,2]).convert('L')
    image = Image.merge("RGB", (r,g,b))
    return image

DataPath = '/CamVid/'
mode='test'
output_file='/colorlabel/'
with open(os.getcwd() + DataPath + mode +'.txt') as f: 
    txt = f.readlines() 
    txt = [line.split(' ') for line in txt] 
for i in range(len(txt)): 
    im = np.array(Image.open(os.getcwd() +txt[i][1][:-1])) 
    y=im.astype(np.float32) 
#######################一通道索引图转换为3通道RGB###############################################
    y= get_rgb(y,y.shape[0],y.shape[1]) 
    y.save(os.getcwd() +output_file+txt[i][1][-18:-1]) 
#########################一通道索引图显示为彩色，但仍为一通道########################################## 
    y = one_hot_it(y,y.shape[0],y.shape[1]) 
    segmentation = get_label_image(y) 
    segmentation.save(os.getcwd() +output_file+txt[i][1][-18:-1])

将RGB的ground truth转化为多通道二值索引图：

def label_colormap(label_rgb):
    h=label_rgb.shape[0]
    w=label_rgb.shape[1]
    cmap=np.zeros([h,w])
    for i in range(h):
        for j in range(w):
            r,g,b=label_rgb[i][j]
            cmap[i][j]=_COLOR_LIST.index([r,g,b])
    return cmap

def get_preprocessed_label_MSRC(file_name):
    #Note: channels_last
    im =label_colormap(np.array(Image.open(file_name)))
    img_h, img_w = im.shape
    im=one_hot_it(im,img_h,img_w)
    if img_h > 500 or img_w > 500:
        raise ValueError("Please resize your images to be not bigger than 500 x 500.")
    pad_h = 500 - img_h
    pad_w = 500 - img_w
    im = np.pad(im, pad_width=((0, pad_h), (0, pad_w),(0,0)), mode='constant', constant_values=0)
    return im.reshape(500, 500,12), img_h, img_w

用单幅图像测试：

input_file = "image.jpg"
output_file = "out.png"
saved_model_path="my_weights.h5"
model = get_model()
model.load_weights(saved_model_path)
img_data, img_h, img_w = util.get_preprocessed_image(input_file)
temp=np.expand_dims(img_data,axis=0)
probs = model.predict(temp, verbose=False)[0, :, :, :]
segmentation = util.get_label_image(probs, img_h, img_w)
segmentation.save(output_file)

使用VGG预训练模型提取图像特征并显示：

from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.models import Model
import numpy as np
import matplotlib.pyplot as plt
import os
import util

img_path='image.jpg'
img=image.load_img(img_path,target_size=(224,224))
x=image.img_to_array(img)
x=np.expand_dims(x,axis=0)
x=preprocess_input(x)

base_model=VGG16(weights='imagenet',include_top=False)
model=Model(inputs=base_model.input,outputs=base_model.get_layer('block3_pool').output)
block3_pool_features=model.predict(x)[0,:,:,:]

for i,layer in enumerate(model.layers):
    print(i,layer.name)

plt.imshow(block3_pool_features[:,:,2])   #show single feature map
plt.axis('off')
plt.show()

for i in range(20):         #show more feature maps
    plt.figure()
    plt.imshow(block3_pool_features[:,:,i])

直方图均衡化（图像处理时常将r与b通道互换）：

from skimage import exposure
def normalized(rgb):
    norm=np.zeros((rgb.shape[0], rgb.shape[1], 3),np.float32)
    b=rgb[:,:,0]
    g=rgb[:,:,1]
    r=rgb[:,:,2]
    norm[:,:,0]=exposure.equalize_hist(b)
    norm[:,:,1]=exposure.equalize_hist(g)
    norm[:,:,2]=exposure.equalize_hist(r)
    return norm

调用时：

from keras.preprocessing import image
x=normalized(image.img_to_array(x))

训练：

import keras.callbacks import ModelCheckpoint,Callback,EarlyStopping

class LossHistory(Callback):
    def on_train_begin(self,logs={}):
        self.losses=[]
    def on_batch_end(self,batch,logs={}):
        self.losses.append(logs.get('loss')) 

model.compile(optimizer=SGD(lr=1e-13,momentum=0.99,nesterov=True),loss='binary_crossentropy',metrics=['accuracy'])
checkpointer=ModelCheckpoint(filepath='my.h5',monitor='val_acc',verbose=1,save_best_only=True,
                             save_weights_only=True,mode='auto')
history=LossHistory()
earlystop=EarlyStopping(monitor='val_acc',patience=10,verbose=1,mode='auto')
    
memory=model.fit(train_data,train_label,batch_size=1,epochs=100,verbose=1,
          callbacks=[checkpointer,earlystop,history],validation_split=0.1,shuffle=True) #validation_split first, then shuffle (validation no shuffle)
scores=model.evaluate(val_data,val_label,batch_size=1,verbose=1)
print("%s:%.2f%%" % (model.metrics_names[1],scores[1]*100))

显示acc和loss：

import matplotlib.pyplot as plt
print(memory.history.keys())   #list all data in memory
#summaryize memory for accuracy
plt.plot(memory.history['acc'])
plt.plot(memory.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','val'],loc='upper left')
plt.show()
#summarize memory for loss
plt.plot(memory.history['loss'])
plt.plot(memory.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','val'],loc='upper right')
plt.show()

data augmentation：使用图像生成器ImageDataGenerator，训练时该函数会无限生成数据，直到达到指定的epoch为止。

from keras.preprocessing.image import ImageDataGenerator

train_datagen=ImageDataGenerator(featurewise_center=False,# set input mean to 0 over the dataset
				samplewise_center=False, #set each sample mean to 0
				featurewise_std_normalization=False, #divide inputs by std of the dataset
				samplewise_std_normalization=False, #divide each input by its std
				zca_whitening=False, #apply ZCA whitening
				rotation_range=10, #randomly rotate images in the range (degrees, 0 to 180)
				width_shift_range=0.1, #randomly shift iamges horizontally (fraction of total width)
				height_shift_range=0.1, #randomly shift images vertically (fraction of total height)
				rescale=1./255,
				shear_range=0.2, 
				zoom_range=0.2, 
				horizontal_flip=True)
test_datagen=ImageDataGenerator(rescale=1./255)

train_generator=train_datagen.flow_from_directory(
			train_data_dir,
			target_size=(img_width,img_height),
			batch_size=32,
			class_mode='binary') #train文件夹下有若干文件夹，各文件夹包含一类。此函数似乎不适用语义分割
validation_generator=test_datagen.flow_from_directory(
			validation_data_dir,
			target_size=(img_width,img_height),
			batch_size=32,
			class_mode='binary')

model.fit_generator(train_generator,
			samples_per_epoch=nb_train_samples,
			nb_epoch=nb_epoch,
			validation_data=validation_generator,
			nb_val_samples=nb_validation_samples)

或

model.fit_generator(datagen.flow(x_train,y_train,batch_size=batch_size),
			steps_per_epoch=batch_size,
			epochs=epochs,
			validation_data=(x_test,y_test))

显示augmentation后的图像：

from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
datagen=ImageDataGenerator(
		rotation_range=40,
		width_shift_range=0.2,
		height_shift_range=0.2,
		rescale=1./255,
		shear_range=0.2,
		zoom_range=0.2,
		horizontal_flip=True,
		fill_mode='nearest')
img=load_img('image.jpg')
x=img_to_array(img)
x=x.reshape((1,)+x.shape)

i=0
for batch in datagen.flow(x,batch_size=1,save_to_dir='preview',save_prefix='augmentation',save_format='jpg'):
    i+=1
    if i>50:
        break

使用预训练权重训练新网络有两种方法。一是将网络分成两个子模型，前一个子模型导入已训练的权重，将输出结果作为后一个子模型的输入，训练后一个子模型（只适用add的网络），前一个子模型输出的保存和载入代码如下：

np.save(open('features_train.npy','w'),features_train)
train_data=np.load(open('features_train.npy'))

二是将已有权重的网络层固定，不再训练，代码如下：

for layer in model.layers[:25]:
    layer.trainable=False

将模型保存为图片：

from keras.utils import plot_model
plot_model(model,'model1.png')

在原图上显示分割结果：

from PIL import Image
import matplotlib.pyplot as plt
mask = Image.open("mask.png")#voc中的单通道，像素值为类标签，读入时mode为P
image = Image.open("image.jpg")#mode为RGB
outputImage = Image.new("RGB",(image.size[0],image.size[1]),(0,0,0))#使两图mode匹配
outputImage.paste(mask)
FullHdOutImage = Image.blend(outputImage, image, 0.5)#0.9时，image更明显；0.1时，outputImage更明显
plt.imshow(FullHdOutImage)

其它读图工具：

from skimage import io
img=io.imread("mask.png", as_grey=False)
io.imshow(img)

keras实现图像语义分割的子函数

猜你喜欢