import torch
import torch.nn as nn
import torch.nn.functional as F
import inspect
import torch.optim as optim
'''
自动求导机制: 对一个标量用backward() 会反向计算在计算图中用到的叶节点的梯度, 如果设置了非叶节点.retain_grad
则可以记录中间节点的梯度。 节点的grad_fn 记录了这个节点通过一个什么样的函数得来的
'''
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
# 定义好forward函数, 调用backward()会自动计算导数
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(x.size()[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def num_flat_features(self, x):
# 取除了batchsize以外的维度
pass
net = Net()
print(net)
input = torch.randn(1, 1, 32, 32)
output = net(input)
target = torch.randn(10)
# 添加一个批次的维度
target = target.view(1, -1)
criterion = nn.MSELoss()
loss = criterion(output, target)
net.zero_grad()
print(net.conv1.bias.grad)
loss.backward()
print(net.conv1.bias.grad)
params = list(net.parameters())
for i in range(len(params)):
print(params[i].size(), '\n')
# 自定义的一个简化的SGD算法
learning_rate = 0.1
for f in net.parameters():
# 利用sub in-place
f.data.sub_(f.grad.data * learning_rate)
# 或者采用系统定义的优化方法
optimizer = optim.SGD(net.parameters(), lr=0.01)
net.parameters:
torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])