训练文件：train.py

引入头部文件

数据转换：

网络的调用：

训练文件：train.py

引入头部文件

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

数据转换：

将其他格式的数据转为Torch类型的数据，举例：

x_train = np.array([[3.3],dtype=np.float32)

y_train = np.array([[1.7],dtype=np.float32)

x_train = torch.from_numpy(x_train)
y_train = torch.from_numpy(y_train)

网络的调用：

将定义的网络调用出来，有CUDA就将其输入到CUDA中计算，没有就算了

if torch.cuda.is_available():
    model = LinearRegression().cuda()
    print("使用的是： CUDA加速")
else:
    model = LinearRegression()
    print("使用的是： CPU加速")

其中LinearRegression为自定义的网络结构

class LinearRegression(nn.Module):
    def __init__(self):
        super(LinearRegression, self).__init__()
        self.line = nn.Linear(1,1)  ##输入输出都是一维的

    def forward(self,x):
        out = self.line(x)
        return out

损失函数和优化函数：

损失函数和优化函数自行选择，这里示例中用的是均方差，学习率为1e-3，动量加速为0.9

##使用均方误差作为损失函数
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=1e-3,momentum=0.9)

epoch训练：

自个定义要进行多少轮epoch训练，顺便把数据转变成variable类型，便于后面的求导计算

##进行训练模型
num_epochs = 100
for epoch in range(num_epochs):
    if torch.cuda.is_available():
        inputs = torch.autograd.Variable(x_train).cuda()
        target = torch.autograd.Variable(y_train).cuda()
    else:
        inputs = torch.autograd.Variable(x_train)
        target = torch.autograd.Variable(y_train)

送入网络：

    ##进行前向传播
    out = model(inputs)
    loss = criterion(out,target)

反向传播更新模型参数：

    ##反向传播
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

保存参数：

后续工作，保存参数或者是保存图片之类的

测试

将需要预测的数据送入模型中得到预测数据，然后进行比对：

##预测结果
model.eval()
##这里会报错，需要先将x_train放入cuda中，然后再转为cpu
predict = model(torch.autograd.Variable(x_train.cuda())).cpu()
predict = predict.data.numpy()

plt.plot(x_train.numpy(), predict, 'r.')

plt.show()

语义分割的混淆矩阵：

    def _generate_matrix(self, gt_image, pre_image):
        mask = (gt_image >= 0) & (gt_image < self.num_class)    ## 相当于找出只有黑色或者是白色的像素点个数
        label = self.num_class * gt_image[mask].astype('int') + pre_image[mask]
        count = np.bincount(label, minlength=self.num_class**2)
        confusion_matrix = count.reshape(self.num_class, self.num_class)
        return confusion_matrix

参考：语义分割评价指标代码：混淆矩阵计算详解_zinc_abc的博客-CSDN博客_语义分割混淆矩阵

MIou：

    def Mean_Intersection_over_Union(self):
        MIoU = np.diag(self.confusion_matrix) / (
                    np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) -
                    np.diag(self.confusion_matrix))

        MIoU = np.nanmean(MIoU)
        return MIoU

FIou：

    def Frequency_Weighted_Intersection_over_Union(self):
        freq = np.sum(self.confusion_matrix, axis=1) / np.sum(self.confusion_matrix)
        iu = np.diag(self.confusion_matrix) / (
                    np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) -
                    np.diag(self.confusion_matrix))

        FWIoU = (freq[freq > 0] * iu[freq > 0]).sum()
        return FWIoU

DataSet：

#  开发人员：    骆根强
#  开发时间：    2022/8/15 17:04
#  功能作用：    未知

import torch
import os
import cv2

from torch.utils.data import Dataset
from torchvision.transforms import transforms
from torchvision.utils import save_image

Origin = 'JPEGImages'
Segmen = 'SegmentationClass'

data_tf = transforms.Compose([
    transforms.ToTensor(),
    # transforms.Normalize([0.48, 0.46, 0.49], [0.48, 0.46, 0.49])
])

# def data_tf(x):
        #     x = np.array(x, dtype='float32') / 255
        #     x = (x - 0.5) / 0.5  # 标准化，这个技巧之后会讲到
        #     x = x.transpose((2, 0, 1))  # 将 channel 放到第一维，只是 pytorch 要求的输入方式
        #     x = torch.from_numpy(x)
        #     return x

class MyDataSet(Dataset):
    def __init__(self, path):
        self.path = path
        self.name = os.listdir(os.path.join(path, Segmen))

    def __len__(self):
        return len(self.name)

    # 简单的正方形转换，把图片和标签转为正方形
    # 图片会置于中央，两边会填充为黑色，不会失真
    def __trans__(self, img, size):
        # 图片的宽高
        h, w = img.shape[0:2]
        # 需要的尺寸
        _w = _h = size
        # 不改变图像的宽高比例
        scale = min(_h / h, _w / w)
        h = int(h * scale)
        w = int(w * scale)
        # 缩放图像
        img = cv2.resize(img, (w, h), interpolation=cv2.INTER_CUBIC)
        # 上下左右分别要扩展的像素数
        top = (_h - h) // 2
        left = (_w - w) // 2
        bottom = _h - h - top
        right = _w - w - left
        # 生成一个新的填充过的图像，这里用纯黑色进行填充(0,0,0)
        new_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
        return new_img

    def __getitem__(self, item):
        se_name = self.name[item]
        se_path = os.path.join(self.path, Segmen, se_name)

        or_path = os.path.join(self.path, Origin, se_name.replace('png', 'jpg'))

        im = cv2.imread(or_path)
        se = cv2.imread(se_path)

        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

        im = self.__trans__(im, 512)
        se = self.__trans__(se, 512)

        # cv2.imshow('1', se)
        # cv2.waitKey()
        # cv2.destroyAllWindows()

        return data_tf(im), data_tf(se)

if __name__ == '__main__':
    data = MyDataSet('D:\\test\\Python\\unet\\train1\\data\\VOCdevkit\\VOC2012')
    for a, b in (data):
        print(a.shape)
        save_image(a[0], 'im.png', nrow=1)

设定参数文件config.py

import argparse

parser = argparse.ArgumentParser(description='文件说明书')

# Hardware options（硬件选项）
parser.add_argument('--cpu', action='store_true',help='use cpu only')
parser.add_argument('--gpu_id', type=list,default=[0], help='use cpu only')

args = parser.parse_known_args()[0]

'''
直接调用
import config
args = config.args
'''

pytorch网络的模板

训练文件：train.py

引入头部文件

数据转换：

网络的调用：

损失函数和优化函数：

epoch训练：

送入网络：

反向传播更新模型参数：

保存参数：

测试

语义分割的混淆矩阵：

MIou：

FIou：

DataSet：

设定参数文件config.py

猜你喜欢