pytorch1.2用FCN语义分割手提包数据集

前言

作为一只刚刚入门深度学习的菜鸟来说,这是第一次编写使用FCN的代码来做语义分割,过程还是挺头疼的,别人的代码一看就懂,自己一写就懵。这篇博客仅记录一下自己的体验。

关于手提包数据集在下述连接有着详细的阐释,这里仅仅是根据个人理解对原文代码的修改。
原文参考链接:https://blog.csdn.net/u014453898/article/details/92080859

代码

  1. 自定义数据集代码
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
import cv2
import pdb
from onehot import onehot
import torch


class BagDataset(Dataset):
    def __init__(self, mode):
        self.tranform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        self.img = os.listdir('last')

        if mode == 'train':
            self.img = self.img[:int(0.6*len(self.img))]
        elif mode == 'val':
            self.img = self.img[int(0.6*len(self.img)):int(0.8*len(self.img))]
        else:
            self.img = self.img[int(0.8*len(self.img)):]

    def __len__(self):
        return len(self.img)

    def __getitem__(self, idx):

        img_name = self.img[idx]

        imgA = cv2.imread('last/'+img_name)
        imgA = cv2.resize(imgA, (160, 160))
        imgB = cv2.imread('last_msk/' + img_name, 0)
        imgB = cv2.resize(imgB, (160, 160))
        imgB = imgB / 255
        imgB = imgB.astype('uint8')
        imgB = onehot(imgB, 2)
        imgB = imgB.swapaxes(0, 2).swapaxes(1, 2)
        imgB = torch.FloatTensor(imgB)
        imgA = self.tranform(imgA)
        
        return imgA, imgB


train_db = BagDataset(mode='train')
val_db = BagDataset(mode='val')
test_db = BagDataset(mode='test')

train_loader = DataLoader(train_db, batch_size=4, shuffle=True, num_workers=4)
val_loader = DataLoader(val_db, batch_size=4, shuffle=True, num_workers=4)
test_loader = DataLoader(test_db, batch_size=4, shuffle=True, num_workers=4)


if __name__ == '__main__':
    for batch in train_loader:
        break
  1. onehot函数
import numpy as np

def onehot(data, n):
    buf = np.zeros(data.shape + (n, ))
    nmsk = np.arange(data.size)*n + data.ravel()
    buf.ravel()[nmsk-1] = 1
    return buf
  1. FCN模型代码
import torch
import torch.nn as nn
from torchvision import models
import time
import visdom
from BagData import train_loader, test_loader, val_loader
import torch.optim as optim
from torchvision.models.vgg import VGG
import numpy as np


class FCN32s(nn.Module):
    def __init__(self, n_class):
        super(FCN32s, self).__init__()
        self.n_class = n_class
        self.feature = models.vgg16(pretrained=True).features
        self.feature[0] = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=100)
        self.module = nn.Sequential(
            nn.Conv2d(512, 4096, kernel_size=7),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            nn.Conv2d(4096, 4096, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Dropout2d()
        )

        self.classifier = nn.Conv2d(4096, self.n_class, kernel_size=1)
        self.deconv1 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=64, stride=32, bias=False)

    def forward(self, x):
        h = x
        x = self.feature(x)
        x = self.module(x)
        score = self.classifier(x)
        upsample = self.deconv1(score)
        upsample = upsample[:, :, 19:19 + x.size()[2], 19:19 + x.size()[3]].contiguous()
        return upsample


class FCN16s(nn.Module):
    def __init__(self, n_class):
        super(FCN16s, self).__init__()
        self.n_class = n_class
        self.feature = list(models.vgg16(pretrained=True).features)
        self.feature[0] = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=100)
        self.feature1 = nn.Sequential(*self.feature[:24])
        self.feature2 = nn.Sequential(*self.feature[24:])
        self.module = nn.Sequential(
            nn.Conv2d(512, 4096, kernel_size=7),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            nn.Conv2d(4096, 4096, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Dropout2d()
        )
        self.classifier1 = nn.Conv2d(4096, self.n_class, kernel_size=1)
        self.classifier2 = nn.Conv2d(512, self.n_class, kernel_size=1)

        self.deconv1 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=4, stride=2, bias=False)
        self.deconv2 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=32, stride=16, bias=False)

        self.bn = nn.BatchNorm2d(self.n_class)

    def forward(self, x):
        # pool4 之后的输出
        num = self.feature1(x)
        num1 = self.feature2(num)
        num2 = self.module(num1)

        score1 = self.classifier1(num2)
        score2 = self.classifier2(num)

        upsample1 = self.bn(self.deconv1(score1))
        score2 = score2[:, :, 5:5 + upsample1.size()[2], 5:5 + upsample1.size()[3]]
        upsample1 += score2
        upsample = self.bn(self.deconv2(upsample1))
        upsample = upsample[:, :, 27:27 + x.size()[2], 27:27 + x.size()[3]].contiguous()

        return upsample


class FCN8s(nn.Module):
    def __init__(self, n_class):
        super(FCN8s, self).__init__()
        self.n_class = n_class
        self.feature = list(models.vgg16(pretrained=True).features)
        self.feature[0] = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=100)
        self.feature1 = nn.Sequential(*self.feature[:17])
        self.feature2 = nn.Sequential(*self.feature[17:24])
        self.feature3 = nn.Sequential(*self.feature[24:])
        self.module = nn.Sequential(
            nn.Conv2d(512, 4096, kernel_size=7),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5),
            nn.Conv2d(4096, 4096, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5)
        )
        self.classifier1 = nn.Conv2d(4096, self.n_class, kernel_size=1)
        self.classifier2 = nn.Conv2d(512, self.n_class, kernel_size=1)
        self.classifier3 = nn.Conv2d(256, self.n_class, kernel_size=1)

        self.deconv1 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=4, stride=2, bias=False)
        self.deconv2 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=4, stride=2, bias=False)
        self.deconv3 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=16, stride=8, bias=False)

        self.bn = nn.BatchNorm2d(self.n_class)

    def forward(self, x):

        num = self.feature1(x)
        num1 = self.feature2(num)
        num2 = self.feature3(num1)
        num3 = self.module(num2)

        score1 = self.classifier1(num3)  # pool5后的得分值
        score2 = self.classifier2(num1)  # pool4后的得分值
        score3 = self.classifier3(num)   # pool3后的得分值

        upsample1 = self.bn(self.deconv1(score1))  # 对pool5 2倍上采样
        score2 = score2[:, :, 5:5 + upsample1.size()[2], 5:5 + upsample1.size()[3]]
        upsample1 += score2
        upsample2 = self.bn(self.deconv2(upsample1))  # 对求和后的结果进行2倍上采样
        score3 = score3[:, :, 9:9 + upsample2.size()[2], 9:9 + upsample2.size()[3]]
        upsample2 += score3
        upsample = self.bn(self.deconv3(upsample2))  # 对求和后的结果进行2倍上采样
        upsample = upsample[:, :, 31:31 + x.size()[2], 31:31 + x.size()[3]].contiguous()

        return upsample
if __name__ == '__main__':
	x = torch.rand(3, 3, 224, 224)
	fcn = FCN8s(20)
	out = fcn(x)
	print(out.shape)
  1. 主函数
import torch
import torch.nn as nn
import time
import visdom
from BagData import train_loader, val_loader, test_loader
import torch.optim as optim
import numpy as np
from Myfcn import FCN8s


device = torch.device('cuda')
# torch.manual_seed(1234)


def evalute(model, loader):
    model.eval()
    correct = 0

    for step, (x, y) in enumerate(loader):
        x, y = x.to(device), y.to(device)
        with torch.no_grad():
            logits = model(x)
            output = torch.sigmoid(logits)
            pred = output.argmin(dim=1)
            y = y.argmin(dim=1)
        correct += torch.eq(pred, y).sum().float().item()

    y = y.cpu().data.numpy().copy()
    total = len(y.flatten()) * (step + 1)
    acc = correct / total
    print('acc:', acc)
    return acc


def main():

    vis = visdom.Visdom()
    fcn_model = FCN8s(n_class=2).to(device)
    optimizer = optim.SGD(fcn_model.parameters(), lr=1e-2, momentum=0.7)
    criteon = nn.BCELoss().to(device)

    best_acc, best_epoch = 0, 0
    global_step = 0
    vis.line([0], [-1], win='loss', opts=dict(title='loss'))
    vis.line([0], [-1], win='val_acc', opts=dict(title='val_acc'))
    for epoch in range(100):
        for step, (x, y) in enumerate(train_loader):

            x = torch.autograd.Variable(x)
            y = torch.autograd.Variable(y)
            x = x.to(device)
            y = y.to(device)

            fcn_model.train()

            logits = fcn_model(x)
            output = torch.sigmoid(logits)
            loss = criteon(output, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            output_np = output.cpu().data.numpy().copy()
            output_np = np.argmin(output_np, axis=1)
            y_np = y.cpu().data.numpy().copy()
            y_np = np.argmin(y_np, axis=1)

            if step % 20 == 0:
                print('epoch {}, {}/{}, loss is {}'.format(epoch, step, len(train_loader), loss.data))
                vis.close(win='pred')
                vis.close(win='label')
                vis.images(output_np[:, None, :, :], win='pred', opts=dict(title='pred'))
                vis.images(y_np[:, None, :, :], win='label', opts=dict(title='label'))
            vis.line([loss.item()], [global_step], win='loss', update='append')
            global_step += 1

        if epoch % 1 == 0:

            val_acc = evalute(fcn_model, val_loader)
            if val_acc > best_acc:
                best_epoch = epoch
                best_acc = val_acc

                torch.save(fcn_model.state_dict(), 'best.mdl')

                vis.line([val_acc], [global_step], win='val_acc', update='append')

    print('best acc:', best_acc, 'best epoch:', best_epoch)

    fcn_model.load_state_dict(torch.load('best.mdl'))
    print('loaded from ckpt!')

    test_acc = evalute(fcn_model, test_loader)
    print('test acc:', test_acc)


if __name__ == '__main__':
    main()

结果分析


根据最后的输出可以看出像素准确度(PA)为0.879546875,最好的epoch为第20个epoch,最后测试集的像素准确度(PA)为0.8733546875

loss损失函数图

像素准确度(PA)变化曲线图

在这里插入图片描述

标签与分割结果对比图

在这里插入图片描述

总结

FCN 采用跨层方法,既同时兼顾全局语义信息和局部位置信息,又能从抽象特征中恢复出像素所属的类别,把图像级别的分类进一步延伸到了像素级别的分类,成功地将原本用于图像分类的网络转变为用于图像分割的网络。

FCN 在分割过程中能够恢复像素所属的类别,但是仍然存在两个问腿:
①图像经过池化操作后,特征图的分辨率不断降低,部分像素的空间位置信息丢失;
②分割过程未能有效地考虑图像上下文信息,无法充分利用丰富的空间位置信息,导致局部特征和全局特征的利用率失衡。

FCN 未能有效地解决这两个问题,致使分割结果粗糙、分割边界不连续。后续可以采用优化卷积结构,添加条件随机场(CRF)等方法改进。

发布了1 篇原创文章 · 获赞 1 · 访问量 106

猜你喜欢

转载自blog.csdn.net/qq_35077115/article/details/104968768