关于训练集的介绍和数据划分可以参照上一个博客:
https://blog.csdn.net/qq_41685265/article/details/104895273
数据加载
class DogCat(data.Dataset):
def __init__(self, root, transforms=None, train=True, test=False):
"""
主要目标: 获取所有图片的地址,并根据训练,验证,测试划分数据
"""
self.test = test
imgs = [os.path.join(root, img) for img in os.listdir(root)]
# test1: data/test1/8973.jpg
# train: data/train/cat.10004.jpg
if self.test:
imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2].split('/')[-1]))
else:
imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2]))
imgs_num = len(imgs)
if self.test:
self.imgs = imgs
elif train:
self.imgs = imgs[:int(0.7 * imgs_num)]
else:
self.imgs = imgs[int(0.7 * imgs_num):]
if transforms is None:
normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
if self.test or not train:
self.transforms = T.Compose([
T.Resize(224),
T.CenterCrop(224),
T.ToTensor(),
normalize
])
else:
self.transforms = T.Compose([
T.Resize(256),
T.CenterCrop(224),
T.RandomHorizontalFlip(),
T.ToTensor(),
normalize
])
def __getitem__(self, index):
"""
一次返回一张图片的数据
"""
img_path = self.imgs[index]
if self.test:
label = int(self.imgs[index].split('.')[-2].split('/')[-1])
else:
label = 1 if 'dog' in img_path.split('/')[-1] else 0
data = Image.open(img_path)
data = self.transforms(data)
return data, label
def __len__(self):
return len(self.imgs)
参数设置
有一些参数没有用得上
class DefaultConfig(object):
env = 'default' # visdom 环境
vis_port = 8097 # visdom 端口
model = 'ResNet34' # 使用的模型,名字必须与models/__init__.py中的名字一致
train_data_root = '/media/cyq/CU/Ubuntu system files/dogs-vs-cats/train/' # 训练集存放路径
test_data_root = '/media/cyq/CU/Ubuntu system files/dogs-vs-cats/test/' # 测试集存放路径
load_model_path = None # 加载预训练的模型的路径,为None代表不加载
batch_size = 48 # batch size
use_gpu = True # user GPU or not
num_workers = 4 # how many workers for loading data
print_freq = 20 # print info every N batch
debug_file = '/tmp/debug' # if os.path.exists(debug_file): enter ipdb
result_file = 'result.csv'
max_epoch = 10
lr = 0.001 # initial learning rate
lr_decay = 0.5 # when val_loss increase, lr = lr*lr_decay
weight_decay = 0e-5 # 损失函数
opt = DefaultConfig()
定义网络结构
class ResNet34(nn.Module):
"""
实现主module:ResNet34
ResNet34包含多个layer,每个layer又包含多个Residual block
用子module来实现Residual block,用_make_layer函数来实现layer
"""
def __init__(self, num_classes=2):
super(ResNet34, self).__init__()
self.model_name = 'resnet34'
# 前几层: 图像转换
resnet34 = models.resnet34(pretrained=True)
self.resnet = nn.Sequential(*list(resnet34.children())[:-1])
self.fc = nn.Linear(in_features=512, out_features=num_classes)
def forward(self, x):
x = self.resnet(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
device = "cuda" if opt.use_gpu==True else "cpu"
net = ResNet34()
net.to(device)
模型训练
def val(model,dataloader):
"""
计算模型在验证集上的准确率等信息
"""
model.eval()
confusion_matrix = meter.ConfusionMeter(2)
for ii, (val_input, label) in tqdm(enumerate(dataloader)):
val_input = val_input.to(device)
with torch.no_grad():
score = model(val_input)
confusion_matrix.add(score.detach().squeeze(), label.type(t.LongTensor))
model.train()
cm_value = confusion_matrix.value()
accuracy = 100. * (cm_value[0][0] + cm_value[1][1]) / (cm_value.sum())
return confusion_matrix, accuracy
def train():
# step1: configure model
# net
# step2: data
train_data = DogCat(opt.train_data_root, train=True)
# print(len(train_data))
val_data = DogCat(opt.train_data_root, train=False)
train_dataloader = DataLoader(train_data, opt.batch_size,
shuffle=True, num_workers=opt.num_workers)
val_dataloader = DataLoader(val_data, opt.batch_size,
shuffle=False, num_workers=opt.num_workers)
# step3: criterion and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=opt.lr, momentum=0.9)
lr = opt.lr
# step4: meters
loss_meter = meter.AverageValueMeter()
confusion_matrix = meter.ConfusionMeter(2)
previous_loss = 1e10
# train
for epoch in range(opt.max_epoch):
# print(epoch)
loss_meter.reset()
confusion_matrix.reset()
for input, label in train_dataloader:
# train model
input = input.to(device)
target = label.to(device)
optimizer.zero_grad()
score = net(input)
loss = criterion(score, target)
loss.backward()
optimizer.step()
# meters update and visualize
loss_meter.add(loss.item())
# detach 一下更安全保险
confusion_matrix.add(score.detach(), target.detach())
# validate and visualize
val_cm, val_accuracy = val(net, val_dataloader)
# vis.plot('val_accuracy', val_accuracy)
print("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()),
lr=lr))
cm_train = confusion_matrix.value()
train_accuracy = 100. * (cm_train[0][0] + cm_train[1][1]) / (cm_train.sum())
print("train-ACC: %.3f"%train_accuracy)
print("val-ACC: %.3f"%val_accuracy)
previous_loss = loss_meter.value()[0]
结果输出
由于使用了预训练的模型,因此网络在迭代到第二代时,验证机正确率已经超过了99%,在5-10代时,训练集精度已经在99.5%左右,下边从测试机中选出几个检验:
模型的输出结果为:
1,4.009683834738098e-06
2,1.8512298538553296e-06
3,1.0709993148338981e-05
4,1.2812791283067781e-05
5,0.9999998807907104
6,0.999983549118042
7,0.9999879598617554
8,0.9999997615814209
9,0.9999997615814209
10,0.9999998807907104
11,0.9999719858169556
12,4.395487849251367e-06
13,0.9999877214431763
14,0.999725878238678
15,0.9999864101409912
16,0.9999921321868896
0代表狗,1代表猫,很显然上边的数据的分类效果是很好的
代码
# coding:utf8
import os
from PIL import Image
from torch.utils import data
import numpy as np
from torchvision import transforms as T
import torch as t
import torch.nn as nn
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from torchnet import meter
from tqdm import tqdm
import torch
import csv
class DogCat(data.Dataset):
def __init__(self, root, transforms=None, train=True, test=False):
"""
主要目标: 获取所有图片的地址,并根据训练,验证,测试划分数据
"""
self.test = test
imgs = [os.path.join(root, img) for img in os.listdir(root)]
# test1: data/test1/8973.jpg
# train: data/train/cat.10004.jpg
if self.test:
imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2].split('/')[-1]))
else:
imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2]))
imgs_num = len(imgs)
if self.test:
self.imgs = imgs
elif train:
self.imgs = imgs[:int(0.7 * imgs_num)]
else:
self.imgs = imgs[int(0.7 * imgs_num):]
if transforms is None:
normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
if self.test or not train:
self.transforms = T.Compose([
T.Resize(224),
T.CenterCrop(224),
T.ToTensor(),
normalize
])
else:
self.transforms = T.Compose([
T.Resize(256),
T.CenterCrop(224),
T.RandomHorizontalFlip(),
T.ToTensor(),
normalize
])
def __getitem__(self, index):
"""
一次返回一张图片的数据
"""
img_path = self.imgs[index]
if self.test:
label = int(self.imgs[index].split('.')[-2].split('/')[-1])
else:
label = 1 if 'dog' in img_path.split('/')[-1] else 0
data = Image.open(img_path)
data = self.transforms(data)
return data, label
def __len__(self):
return len(self.imgs)
class DefaultConfig(object):
env = 'default' # visdom 环境
vis_port = 8097 # visdom 端口
model = 'ResNet34' # 使用的模型,名字必须与models/__init__.py中的名字一致
train_data_root = '/media/cyq/CU/Ubuntu system files/dogs-vs-cats/train/' # 训练集存放路径
test_data_root = '/media/cyq/CU/Ubuntu system files/dogs-vs-cats/test/' # 测试集存放路径
load_model_path = None # 加载预训练的模型的路径,为None代表不加载
batch_size = 48 # batch size
use_gpu = True # user GPU or not
num_workers = 4 # how many workers for loading data
print_freq = 20 # print info every N batch
debug_file = '/tmp/debug' # if os.path.exists(debug_file): enter ipdb
result_file = 'result.csv'
max_epoch = 10
lr = 0.001 # initial learning rate
lr_decay = 0.5 # when val_loss increase, lr = lr*lr_decay
weight_decay = 0e-5 # 损失函数
opt = DefaultConfig()
class ResNet34(nn.Module):
"""
实现主module:ResNet34
ResNet34包含多个layer,每个layer又包含多个Residual block
用子module来实现Residual block,用_make_layer函数来实现layer
"""
def __init__(self, num_classes=2):
super(ResNet34, self).__init__()
self.model_name = 'resnet34'
# 前几层: 图像转换
resnet34 = models.resnet34(pretrained=True)
self.resnet = nn.Sequential(*list(resnet34.children())[:-1])
self.fc = nn.Linear(in_features=512, out_features=num_classes)
def forward(self, x):
x = self.resnet(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
device = "cuda" if opt.use_gpu==True else "cpu"
net = ResNet34()
net.to(device)
def val(model,dataloader):
"""
计算模型在验证集上的准确率等信息
"""
model.eval()
confusion_matrix = meter.ConfusionMeter(2)
for ii, (val_input, label) in tqdm(enumerate(dataloader)):
val_input = val_input.to(device)
with torch.no_grad():
score = model(val_input)
confusion_matrix.add(score.detach().squeeze(), label.type(t.LongTensor))
model.train()
cm_value = confusion_matrix.value()
accuracy = 100. * (cm_value[0][0] + cm_value[1][1]) / (cm_value.sum())
return confusion_matrix, accuracy
def train():
# step1: configure model
# net
# step2: data
train_data = DogCat(opt.train_data_root, train=True)
# print(len(train_data))
val_data = DogCat(opt.train_data_root, train=False)
train_dataloader = DataLoader(train_data, opt.batch_size,
shuffle=True, num_workers=opt.num_workers)
val_dataloader = DataLoader(val_data, opt.batch_size,
shuffle=False, num_workers=opt.num_workers)
# step3: criterion and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=opt.lr, momentum=0.9)
lr = opt.lr
# step4: meters
loss_meter = meter.AverageValueMeter()
confusion_matrix = meter.ConfusionMeter(2)
previous_loss = 1e10
# train
for epoch in range(opt.max_epoch):
# print(epoch)
loss_meter.reset()
confusion_matrix.reset()
for input, label in train_dataloader:
# train model
input = input.to(device)
target = label.to(device)
optimizer.zero_grad()
score = net(input)
loss = criterion(score, target)
loss.backward()
optimizer.step()
# meters update and visualize
loss_meter.add(loss.item())
# detach 一下更安全保险
confusion_matrix.add(score.detach(), target.detach())
# validate and visualize
val_cm, val_accuracy = val(net, val_dataloader)
# vis.plot('val_accuracy', val_accuracy)
print("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()),
lr=lr))
cm_train = confusion_matrix.value()
train_accuracy = 100. * (cm_train[0][0] + cm_train[1][1]) / (cm_train.sum())
print("train-ACC: %.3f"%train_accuracy)
print("val-ACC: %.3f"%val_accuracy)
previous_loss = loss_meter.value()[0]
train()
torch.save(net, 'model.pkl')