方法一:
import torch
from torch.autograd import Variable
N, D_in, H, D_out = 64, 1000, 100, 10
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)
# define our model as a sequence of layers
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out))
# nn defines common loss functions
loss_fn = torch.nn.MSELoss(size_average=False)
learning_rate = 1e-4
for t in range(500):
# forward pass: feed data to model, and prediction to loss function
y_pred = model(x)
loss = loss_fn(y_pred, y)
# backward pass: compute all gradients
model.zero_grad()
loss.backward()
# make gradient step on each model parameter
for param in model.parameters():
param.data -= learning_rate * param.grad.data
方法二:
import torch
from torch.autograd import Variable
N, D_in, H, D_out = 64, 1000, 100, 10
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)
# define our model as a sequence of layers
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out))
# nn defines common loss functions
loss_fn = torch.nn.MSELoss(size_average=False)
learning_rate = 1e-4
# use an optimizer for different update rules
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(500):
# forward pass: feed data to model, and prediction to loss function
y_pred = model(x)
loss = loss_fn(y_pred, y)
# backward pass: compute all gradients
model.zero_grad()
loss.backward()
# update all parameters after computing gradients
optimizer.step()
方法三:
import torch
from torch.autograd import Variable
import torch.nn as nn
# define our whole model as a single Module
class TwoLayerNet(nn.Module):
# Initializer sets up two children (Modules can contain modules)
def _init_(self, D_in, H, D_out):
super(TwoLayerNet, self)._init_()
self.linear1 = torch.nn.Linear(D_in, H)
self.linear2 = torch.nn.Linear(H, D_out)
# Define forward pass using child modules and autograd ops on Variables
# No need to define backward - autograd will handle it
def forward(self, x):
h_relu = self.linear1(x).clamp(min=0)
y_pred = self.linear2(h_relu)
return y_pred
N, D_in, H, D_out = 64, 1000, 100, 10
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)
# Construct and train an instance of our model
model = TwoLayerNet(D_in, H, D_out)
criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
for t in range(500):
y_pred = model(x)
loss = criterion(y_pred, y)
model.zero_grad()
loss.backward()
optimizer.step()