Pytorch:使用循环神经网络LSTM训练MNIST数据集

完整代码:

import torch
from torch import nn
from torch.nn import init
import torchvision
import torchvision.transforms as transforms
import sys
import d2lzh_pytorch as d2l
import time
from torch.autograd import Variable

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

batch_size=128
input_size=28
time_step=28
hidden_size=64
num_layers=1


trainset = torchvision.datasets.MNIST(root="D:/pythonlearning",train=True,transform=transforms.ToTensor(),
    download=False)
testset = torchvision.datasets.MNIST(root="D:/pythonlearning",train=False,transform=transforms.ToTensor())

trainloader = torch.utils.data.DataLoader(dataset=trainset,batch_size=batch_size,shuffle=True)
testloader = torch.utils.data.DataLoader(dataset=testset,batch_size=batch_size,shuffle=False)


lr = 0.03
num_epochs = 20

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True)
        self.Linear=nn.Linear(hidden_size, 10)
    
    def forward(self, x):
        r_out, (h_n, c_n) = self.rnn(x, None)
        #x :(batch, time_step, input_size)
        #hidden state :(h_n, h_c)(hidden_state, hidden_cell)
        output = self.Linear(r_out[:,-1,:])#out:(batch, time_step, hidden_size)选最后一个output(分类)
        return output


model = RNNModel(input_size, hidden_size, num_layers).to(device)
#print(model)

optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()#不是one-hot形式


print("training on ", device)

for epoch in range(num_epochs):
    start = time.time()
    for i, (images, labels) in enumerate(trainloader):
        images = Variable(images.view(-1, time_step, input_size)).cuda()
        labels = Variable(labels).cuda()

        outputs = model(images)
        l = loss(outputs, labels)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()


    print('epoch %d, loss %.4f, time %.1f sec'
            % (epoch+1, l.data.cpu().numpy(), time.time()-start))
    correct = 0
    total = 0
    for images, labels in testloader:
        images = Variable(images.view(-1, time_step, input_size)).cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.cpu()==labels).sum()

    print('test acc: %.4f' %(100.0 * correct / total) )

部分训练结果:
在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/weixin_45850972/article/details/105731043