完整代码:
import torch
from torch import nn
from torch.nn import init
import torchvision
import torchvision.transforms as transforms
import sys
import d2lzh_pytorch as d2l
import time
from torch.autograd import Variable
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size=128
input_size=28
time_step=28
hidden_size=64
num_layers=1
trainset = torchvision.datasets.MNIST(root="D:/pythonlearning",train=True,transform=transforms.ToTensor(),
download=False)
testset = torchvision.datasets.MNIST(root="D:/pythonlearning",train=False,transform=transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(dataset=trainset,batch_size=batch_size,shuffle=True)
testloader = torch.utils.data.DataLoader(dataset=testset,batch_size=batch_size,shuffle=False)
lr = 0.03
num_epochs = 20
class RNNModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers):
super(RNNModel, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True)
self.Linear=nn.Linear(hidden_size, 10)
def forward(self, x):
r_out, (h_n, c_n) = self.rnn(x, None)
#x :(batch, time_step, input_size)
#hidden state :(h_n, h_c)(hidden_state, hidden_cell)
output = self.Linear(r_out[:,-1,:])#out:(batch, time_step, hidden_size)选最后一个output(分类)
return output
model = RNNModel(input_size, hidden_size, num_layers).to(device)
#print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()#不是one-hot形式
print("training on ", device)
for epoch in range(num_epochs):
start = time.time()
for i, (images, labels) in enumerate(trainloader):
images = Variable(images.view(-1, time_step, input_size)).cuda()
labels = Variable(labels).cuda()
outputs = model(images)
l = loss(outputs, labels)
optimizer.zero_grad()
l.backward()
optimizer.step()
print('epoch %d, loss %.4f, time %.1f sec'
% (epoch+1, l.data.cpu().numpy(), time.time()-start))
correct = 0
total = 0
for images, labels in testloader:
images = Variable(images.view(-1, time_step, input_size)).cuda()
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted.cpu()==labels).sum()
print('test acc: %.4f' %(100.0 * correct / total) )
部分训练结果: