1.定义Dataset类
在定义自己的Dataset类时,必须重新定义父类(torch.utils.data.Dataset)的以下两个私有成员函数:
class dog_cat_datastes(torch.utils.data.Dataset):
def __getitem__(self, index):
def __len__(self):
len() : 返回值为训练集的长度,有多少个样本,返回值就是多少。
getitem(index): 返回值为每个样本的数据和标签。
index参数表示样本的索引值。当你使用torch.utils.data.DataLoader对数据集进行装载时,会通过前面所说的的len()函数,可以得到训练集的大小;torch.utils.data.DataLoader函数将自动根据len()函数的返回值生成生成一个[0,返回值)的列表;DataLoader函数里面有个参数shuffle,并且shuffle= True时,意思就是将训练集打乱进行训练。实际上就是将[0,返回值)的列表进行打乱,然后以打乱后的顺序一个一个赋值给index,实现了对数据集的乱序训练。其实通过__init__()初始化Dataset类时,需要定义一个训练集路径的属性,该属性需要包含训练集所有样本的路径,可以是list或者ndarray,在getitem函数中需要通过self.list[index]来得到单个样本的路径,之后再进行读取,预处理等操作返回这个样本的数据和标签。mini_batch的相关操作在DataLoader函数函数中进行,在getitem函数中只需要给定index返回相应的单个样本的数据和标签即可。
import torch
class dog_cat_datastes(torch.utils.data.Dataset):
def __init__(self):
# TODO
# 初始化文件路径或文件名列表。
# 也就是在这个模块里,所做的工作就是初始化该类的一些基本参数。
def __getitem__(self,index):
# TODO
#1。从文件中读取一个数据(例如,使用numpy.fromfile,PIL.Image.open)。
#2。预处理数据(例如torchvision.Transform)。
#3。返回数据对(例如图像和标签)。
# 这里需要注意的是,第一步:read one data,是一个data
def __len__(self):
# 返回数据集的总大小。
example
import torch
import torchvision
from torch.autograd import Variable
import numpy as np
from PIL import Image
import os
import glob
import time
class dog_cat_datastes(torch.utils.data.Dataset):
def __init__(self,train_paths):
self.train_paths = train_paths
#定义数据预处理方式
self.transform_patch = torchvision.transforms.Compose([torchvision.transforms.Scale([64,64]),
torchvision.transforms.ToTensor()])
def __getitem__(self,index):
patch_path = self.train_paths[index]
basename = os.path.basename(patch_path)
if 'cat' in basename:
label = np.array(0)
elif 'dog' in basename:
label = np.array(1)
patch = Image.open(patch_path).convert('RGB')
patch = self.transform_patch(patch)
return patch.clone(),label.copy()
def __len__(self):
return len(self.train_paths)
2.数据集装载
在进行数据集装载时,需要用到torch.utils.data.DataLoader函数。
train_paths = glob.glob(os.path.join(r'C:\Users\YC\Desktop\dog_cat\train','*'))
#实例化自定义Dataset类
train_datagen = dog_cat_datastes(train_paths)
#实例化DataLoader类
dataloader = torch.utils.data.DataLoader(dataset=train_datagen,batch_size = 16,shuffle= True)
参数介绍:
①dataset:训练集,必须是torch.utils.data.Dataset类或者它的子类。
②batch_size: mini batch 训练时,batch的大小。
③shuffle:在一个epoch中,利用训练集训练时训练集是否乱序。
当利用torch.utils.data.DataLoader函数装载好数据后,如果训练集样本数量train_dataset_num不是batch_size的整数倍时,那么最后一个batch的样本数量为num = train_dataset_num%batch_size。(eg:假设train_dataset_num = 50,batch_size = 16,那么在一个epoch中batch的数量batch_num = 4,前三个batch中样本数量为16,左后一个样本数量为2)
3.构建神经网络模型
在构建神经网络模型时,需要定义自己的Module类,必须继承父类(torch.nn.Module),在定义自己的Module类时,需要自己重新定义以下两个函数:
class Models(torch.nn.Module):
def __init__(self):
#TODO
#调用父类的初始化方法
#利用torch.nn.Sequential()函数定义神经网络模块
def forward(self):
#TODO
#定义前向传播的过程,返回传播后的结果。
example
#VGG16神经网络模型自定义
class Models(torch.nn.Module):
def __init__(self):
super(Models,self).__init__()
self.Conv = torch.nn.Sequential(
torch.nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size= 2,stride=2),
torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2),
torch.nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2),
torch.nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2),
)
self.Classes = torch.nn.Sequential(
torch.nn.Linear(4*4*512, 1024),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(1024,1024),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(1024,2)
)
def forward(self, input):
x = self.Conv(input)
x = x.view(-1,4*4*512) #相当于reshape操作
x = self.Classes(x)
return x
4.定义损失函数和优化器
#实例化损失函数CrossEntropyLoss类
loss_f = torch.nn.CrossEntropyLoss()
#实例化优化器Adam类
optimizer = torch.optim.Adam(model.parameters(),lr = 0.00001)
torch.optim.Adam参数介绍:
①model.parameters():表示模型里需要优化的参数,即各个节点的权重值。
②lr:表示学习率
5.训练神经网络模型
model = Models()#实例化模型
epoch_n = 10 #定义eopch数量
time_open = time.time()
for epoch in range(epoch_n):
print("training...........")
running_loss = 0.0
running_corrects = 0
for batch,data in enumerate(dataloader,1):
x ,y = data #data为dataloader的返回值,为一个batch的数据和标签
y = y.type(torch.LongTensor)
x, y = Variable(x),Variable(y)
y_pred = model(x) #利用模型得到模型的预测值
_ ,pred = torch.max(y_pred.data,1)
optimizer.zero_grad() #需要将所有节点的梯度重置为0,否则会累加梯度值
loss = loss_f(y_pred,y)#计算损失
loss.backward()#反向传播,并自动计算各个权重的梯度
optimizer.step()#对权重进行优化
running_loss+=loss.item()# loss.item()的值为每个batch的损失
running_corrects += torch.sum(pred==y.data)# torch.sum(pred==y.data)为每个batch中分类正确的数量
print('batch:',batch)
if batch%1==0:
print("batch{},train loss:{:.4f},train acc:{:.4f}"
.format(batch,running_loss/batch,100*running_corrects/(16*batch)))
epoch_loss = running_loss*16/len(train_datagen)
epoch_acc = 100*running_corrects/len(train_datagen)
print("train (loss:{:.4f} acc:{:.4f})".format(epoch_loss,epoch_acc))
time_end = time.time()-time_open
print(time_end)
6.整体代码
整理后分成了两个python文件:dog_vs_cat.py和 Model_dataset.py
dog_vs_cat.py
import torch
import torchvision
from torch.autograd import Variable
import os
import glob
import argparse
import time
from PIL import Image
from Model_dataset import dog_cat_datastes,Models
def main(args):
if args.train:
train_paths = glob.glob(os.path.join(r'C:\Users\youchao\Desktop\dog_cat\train_all','*'))
train_datagen = dog_cat_datastes(train_paths)
dataloader = torch.utils.data.DataLoader(dataset=train_datagen, batch_size=16, shuffle=True)
model = Models()
model.train()
print(torch.cuda.is_available())
if torch.cuda.is_available():
model = model.cuda()
loss_f = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
epoch_n = 1
time_open = time.time()
for epoch in range(epoch_n):
print("training...........")
running_loss = 0.0
running_corrects = 0
for batch,data in enumerate(dataloader, 1):
x, y = data
y = y.type(torch.LongTensor)
if torch.cuda.is_available():
x, y = Variable(x.cuda()),Variable(y.cuda())
else:
x, y = Variable(x), Variable(y)
y_pred = model(x)
_, pred = torch.max(y_pred.data,1)
optimizer.zero_grad()
loss = loss_f(y_pred, y)
loss.backward()
optimizer.step()
running_loss += loss.item()
running_corrects += torch.sum(pred == y.data)
if batch%10 == 0:
print("batch{},train loss:{:.4f},train acc:{:.4f}"
.format(batch, running_loss/batch, 100*running_corrects/(16*batch)))
epoch_loss = running_loss*16/len(train_datagen)
epoch_acc = 100*running_corrects/len(train_datagen)
print("train (loss:{:.4f} acc:{:.4f})".format(epoch_loss, epoch_acc))
time_end = time.time()-time_open
torch.save(model,'model.pkl')
print(time_end)
if args.test:
test_paths = glob.glob(os.path.join(r'C:\Users\youchao\Desktop\dog_cat\test','*'))
model = torch.load('model.pkl')
model.eval()
for test_path in test_paths:
test_path_basename = os.path.basename(test_path)
patch = Image.open(test_path).convert('RGB')
transform_patch = torchvision.transforms.Compose([torchvision.transforms.Scale([64, 64]),
torchvision.transforms.ToTensor()])
x = transform_patch(patch).unsqueeze(0)
if torch.cuda.is_available():
model.cuda()
x = Variable(x.cuda())
else:
x= Variable(x)
y_pred = model(x)
_, pred = torch.max(y_pred.data, 1)
assert len(pred)==1
if pred == 0:
label = 'cat'
else:
label = 'dog'
print(f'patch {test_path_basename} : {label}')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Train or test model(s)')
parser.add_argument('--train', dest='train', action='store_true', help='train new models')
parser.add_argument('--test', dest='test', action='store_true', help='test saved models')
args = parser.parse_args()
main(args)
Model_dataset.py
import torch
import torchvision
import numpy as np
from PIL import Image
import os
class dog_cat_datastes(torch.utils.data.Dataset):
def __init__(self, train_paths):
self.train_paths = train_paths
self.transform_patch = torchvision.transforms.Compose([torchvision.transforms.Scale([64,64]),
torchvision.transforms.ToTensor()])
def __getitem__(self,index):
patch_path = self.train_paths[index]
basename = os.path.basename(patch_path)
if 'cat' in basename:
label = np.array(0)
elif 'dog' in basename:
label = np.array(1)
patch = Image.open(patch_path).convert('RGB')
patch = self.transform_patch(patch)
return patch.clone(), label.copy()
def __len__(self):
return len(self.train_paths)
class Models(torch.nn.Module):
def __init__(self):
super(Models,self).__init__()
self.Conv = torch.nn.Sequential(
torch.nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size= 2,stride=2),
torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2),
torch.nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2),
torch.nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2),
)
self.Classes = torch.nn.Sequential(
torch.nn.Linear(4*4*512, 1024),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(1024,1024),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(1024,2)
)
def forward(self, input):
x = self.Conv(input)
x = x.view(-1, 4*4*512)
x = self.Classes(x)
return x