目录
训练文件:train.py
引入头部文件
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
数据转换:
将其他格式的数据转为Torch类型的数据,举例:
x_train = np.array([[3.3],dtype=np.float32)
y_train = np.array([[1.7],dtype=np.float32)
x_train = torch.from_numpy(x_train)
y_train = torch.from_numpy(y_train)
网络的调用:
将定义的网络调用出来,有CUDA就将其输入到CUDA中计算,没有就算了
if torch.cuda.is_available():
model = LinearRegression().cuda()
print("使用的是: CUDA加速")
else:
model = LinearRegression()
print("使用的是: CPU加速")
其中LinearRegression为自定义的网络结构
class LinearRegression(nn.Module):
def __init__(self):
super(LinearRegression, self).__init__()
self.line = nn.Linear(1,1) ##输入输出都是一维的
def forward(self,x):
out = self.line(x)
return out
损失函数和优化函数:
损失函数和优化函数自行选择,这里示例中用的是均方差,学习率为1e-3,动量加速为0.9
##使用均方误差作为损失函数
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=1e-3,momentum=0.9)
epoch训练:
自个定义要进行多少轮epoch训练,顺便把数据转变成variable类型,便于后面的求导计算
##进行训练模型
num_epochs = 100
for epoch in range(num_epochs):
if torch.cuda.is_available():
inputs = torch.autograd.Variable(x_train).cuda()
target = torch.autograd.Variable(y_train).cuda()
else:
inputs = torch.autograd.Variable(x_train)
target = torch.autograd.Variable(y_train)
送入网络:
##进行前向传播
out = model(inputs)
loss = criterion(out,target)
反向传播更新模型参数:
##反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
保存参数:
后续工作,保存参数或者是保存图片之类的
测试
将需要预测的数据送入模型中得到预测数据,然后进行比对 :
##预测结果
model.eval()
##这里会报错,需要先将x_train放入cuda中,然后再转为cpu
predict = model(torch.autograd.Variable(x_train.cuda())).cpu()
predict = predict.data.numpy()
plt.plot(x_train.numpy(), predict, 'r.')
plt.show()
语义分割的混淆矩阵:
def _generate_matrix(self, gt_image, pre_image):
mask = (gt_image >= 0) & (gt_image < self.num_class) ## 相当于找出只有黑色或者是白色的像素点个数
label = self.num_class * gt_image[mask].astype('int') + pre_image[mask]
count = np.bincount(label, minlength=self.num_class**2)
confusion_matrix = count.reshape(self.num_class, self.num_class)
return confusion_matrix
MIou:
def Mean_Intersection_over_Union(self):
MIoU = np.diag(self.confusion_matrix) / (
np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) -
np.diag(self.confusion_matrix))
MIoU = np.nanmean(MIoU)
return MIoU
FIou:
def Frequency_Weighted_Intersection_over_Union(self):
freq = np.sum(self.confusion_matrix, axis=1) / np.sum(self.confusion_matrix)
iu = np.diag(self.confusion_matrix) / (
np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) -
np.diag(self.confusion_matrix))
FWIoU = (freq[freq > 0] * iu[freq > 0]).sum()
return FWIoU
DataSet:
# 开发人员: 骆根强
# 开发时间: 2022/8/15 17:04
# 功能作用: 未知
import torch
import os
import cv2
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from torchvision.utils import save_image
Origin = 'JPEGImages'
Segmen = 'SegmentationClass'
data_tf = transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize([0.48, 0.46, 0.49], [0.48, 0.46, 0.49])
])
# def data_tf(x):
# x = np.array(x, dtype='float32') / 255
# x = (x - 0.5) / 0.5 # 标准化,这个技巧之后会讲到
# x = x.transpose((2, 0, 1)) # 将 channel 放到第一维,只是 pytorch 要求的输入方式
# x = torch.from_numpy(x)
# return x
class MyDataSet(Dataset):
def __init__(self, path):
self.path = path
self.name = os.listdir(os.path.join(path, Segmen))
def __len__(self):
return len(self.name)
# 简单的正方形转换,把图片和标签转为正方形
# 图片会置于中央,两边会填充为黑色,不会失真
def __trans__(self, img, size):
# 图片的宽高
h, w = img.shape[0:2]
# 需要的尺寸
_w = _h = size
# 不改变图像的宽高比例
scale = min(_h / h, _w / w)
h = int(h * scale)
w = int(w * scale)
# 缩放图像
img = cv2.resize(img, (w, h), interpolation=cv2.INTER_CUBIC)
# 上下左右分别要扩展的像素数
top = (_h - h) // 2
left = (_w - w) // 2
bottom = _h - h - top
right = _w - w - left
# 生成一个新的填充过的图像,这里用纯黑色进行填充(0,0,0)
new_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
return new_img
def __getitem__(self, item):
se_name = self.name[item]
se_path = os.path.join(self.path, Segmen, se_name)
or_path = os.path.join(self.path, Origin, se_name.replace('png', 'jpg'))
im = cv2.imread(or_path)
se = cv2.imread(se_path)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = self.__trans__(im, 512)
se = self.__trans__(se, 512)
# cv2.imshow('1', se)
# cv2.waitKey()
# cv2.destroyAllWindows()
return data_tf(im), data_tf(se)
if __name__ == '__main__':
data = MyDataSet('D:\\test\\Python\\unet\\train1\\data\\VOCdevkit\\VOC2012')
for a, b in (data):
print(a.shape)
save_image(a[0], 'im.png', nrow=1)
设定参数文件config.py
import argparse
parser = argparse.ArgumentParser(description='文件说明书')
# Hardware options(硬件选项)
parser.add_argument('--cpu', action='store_true',help='use cpu only')
parser.add_argument('--gpu_id', type=list,default=[0], help='use cpu only')
args = parser.parse_known_args()[0]
'''
直接调用
import config
args = config.args
'''