pytorch学习笔记（四）之mnist数字识别_多层卷积层网络

2020.2.5pytorch学习笔记

1.与神经元连接的空间大小叫做神经元的感受野(receptive field),其实就是滤波器的宽和高，在深度方向上，其大小总是和输入的深度相等。对待空间维度(宽和高)和深度维度是不同的，连接在空间上是局部的。
2.卷积层的输出深度是一个超参数，它与使用的滤波器数量一致，每种滤波器所做的就是在输入数据中寻找一种特征。
3.输出尺寸计算公式
K = (w-f+2p)/s+1
其中w表示输入尺寸，f是卷积核大小，p是边界填充0的数量。S表示步长。
4．参数共享之所以能够有效，是因为一个特征在不同位置表现是相同的。
5.图片特征具有不变性，通过下采样不会丢失图片拥有的特征。
6.尽可能使用小尺寸的滤波器。
7.零填充的使用可以让卷积层的输入和输出在空间上的维度保持一致。如果不使用填充，那么数据的尺寸会略微减小，在不断卷积的过程中，图像的边缘信息会过快损失掉。
8.torch.nn.Conv2d(in_channels,out_channels,kernel_size, stride=1,padding=0,dilation=1,groups=1,bias=True)
in_channels:卷积层输入深度
out_channels:卷积层输出深度，也就是卷积核个数
kernel_size:卷积核大小
bias:是否使用偏置
9.多层卷积层mnist数字识别

import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1,16,kernel_size=3),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(16,32,kernel_size=3),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer3 = nn.Sequential(
            nn.Conv2d(32,64,kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )

        self.layer4 = nn.Sequential(
            nn.Conv2d(64,128,kernel_size=3),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc = nn.Sequential(
            nn.Linear(128*4*4, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024,128),
            nn.ReLU(inplace=True),
            nn.Linear(128,10)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

batch_size = 64
learning_rate = 1e-2
num_epoches = 20
data_tf = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST(root='./data', train=True, transform=data_tf, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=data_tf)
train_loader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

model = CNN()#这里改类型
if torch.cuda.is_available():
    model = model.cuda()
criterion = nn.CrossEntropyLoss()#交叉熵
optimizer = optim.SGD(model.parameters(), lr=learning_rate)#随机梯度下降

#train
def train(model, train_loader, optimizer):
    model.train()
    for batch_idx, data in enumerate(train_loader):
        img, label = data
        #print('img.size: ', img.size())
        #img = img.view(img.size(0), -1)
        #print('img.size: ', img.size)
        if torch.cuda.is_available():
            img = Variable(img, volatile=True).cuda()
            label = Variable(label, volatile=True).cuda()
        else:
            img = Variable(img, volatile=True)
            label = Variable(label, volatile=True)
        out = model(img)
        optimizer.zero_grad()
        loss = criterion(out, label)
        loss.backward()
        optimizer.step()
        if (batch_idx + 1) % 100 == 0:
            print('loss: ', loss.data)

for epoch in range(num_epoches):
    print('train epoch: ', epoch)
    train(model, train_loader, optimizer)

def test(model, test_loader):
    model.eval()
    eval_loss = 0
    eval_acc = 0
    for data in test_loader:
        img, label = data
       # img = img.view(img.size(0), -1)#64,28*28这句话不要了，因为是卷积
        if torch.cuda.is_available():
            img = Variable(img, volatile=True).cuda()
            label = Variable(label, volatile=True).cuda()
        else:
            img = Variable(img, volatile=True)
            label = Variable(label, volatile=True)
        out = model(img)
        loss = criterion(out, label)
        eval_loss += loss.data * label.size(0)
        _, pred = torch.max(out, 1)
        num_correct = (pred == label).sum()
        eval_acc += num_correct.data[0]
        #计算平均损失和准确度
    print('test loss:{:.6f}, acc:{:.6f}'.format(eval_loss/len(test_dataset), eval_acc.cpu().numpy()/len(test_dataset)))
test(model, test_loader)

训练时间明显增长

test loss:0.023492, acc:0.991500

精确度达到99.15%！！！

薄小盒子

发布了35 篇原创文章 · 获赞 13 · 访问量 6309

私信关注

pytorch学习笔记（四）之mnist数字识别_多层卷积层网络

pytorch学习笔记（四）之mnist数字识别_多层卷积层网络

猜你喜欢