使用Sequential方式进行建模
import numpy as np
import torch
from torch import nn
# 定义一个 Sequential 模型
net1 = nn.Sequential(
nn.Linear(30, 40),
nn.ReLU(),
nn.Linear(40, 50),
nn.ReLU(),
nn.Linear(50, 10)
)
for layer in net1:
print(layer)
if isinstance(layer, nn.Linear): # 判断是否是线性层
param_shape = layer.weight.shape
layer.weight.data = torch.from_numpy(np.random.normal(0, 0.5, size=param_shape))
# 定义为均值为 0,方差为 0.5 的正态分布
输出结果如下:
Linear(in_features=30, out_features=40, bias=True)
ReLU()
Linear(in_features=40, out_features=50, bias=True)
ReLU()
Linear(in_features=50, out_features=10, bias=True)
使用Module方式进行建模
对于 Module 的参数初始化,其实也非常简单,如果想对其中的某层进行初始化,可以直接像 Sequential 一样对其 Tensor 进行重新定义,其唯一不同的地方在于,如果要用循环的方式访问,需要介绍两个属性,children 和 modules,下面我们举例来说明
class sim_net(nn.Module):
def __init__(self):
super(sim_net, self).__init__()
self.l1 = nn.Sequential(
nn.Linear(30, 40),
nn.ReLU()
)
self.l1[0].weight.data = torch.randn(40, 30) # 直接对某一层初始化
self.l2 = nn.Sequential(
nn.Linear(40, 50),
nn.ReLU()
)
self.l3 = nn.Sequential(
nn.Linear(50, 10),
nn.ReLU()
)
def forward(self, x):
x = self.l1(x)
x =self.l2(x)
x = self.l3(x)
return x
i = 0
for layer in net2.modules():
print(i)
i = i + 1
if isinstance(layer, nn.Linear):
param_shape = layer.weight.shape
layer.weight.data = torch.from_numpy(np.random.normal(0, 0.5, size=param_shape))
i = 0
for layer in net2.children():
print(i)
i = i + 1
if isinstance(layer[0], nn.Linear):
param_shape = layer[0].weight.shape
layer[0].weight.data = torch.from_numpy(np.random.normal(0, 0.5, size=param_shape))
遍历module属性的时候,输出结果如下:
0
1
2
3
4
5
6
7
8
9
遍历children属性的时候,输出结果如下:
0
1
2
比较上述两种输出结果,不难发现,使用module方式进行建模的时候,我们是从顶层到底层对模型进行遍历,而采用chidren方式则只能对模型的Sequetial进行遍历。下面我们就具体来看看两种遍历方式的不同:
# 访问 modules
for i in net2.modules():
print(i)
sim_net(
(l1): Sequential(
(0): Linear(in_features=30, out_features=40)
(1): ReLU()
)
(l2): Sequential(
(0): Linear(in_features=40, out_features=50)
(1): ReLU()
)
(l3): Sequential(
(0): Linear(in_features=50, out_features=10)
(1): ReLU()
)
)
Sequential(
(0): Linear(in_features=30, out_features=40)
(1): ReLU()
)
Linear(in_features=30, out_features=40)
ReLU()
Sequential(
(0): Linear(in_features=40, out_features=50)
(1): ReLU()
)
Linear(in_features=40, out_features=50)
ReLU()
Sequential(
(0): Linear(in_features=50, out_features=10)
(1): ReLU()
)
Linear(in_features=50, out_features=10)
ReLU()
# 访问 children
for i in net2.children():
print(i)
Sequential(
(0): Linear(in_features=30, out_features=40, bias=True)
(1): ReLU()
)
Sequential(
(0): Linear(in_features=40, out_features=50, bias=True)
(1): ReLU()
)
Sequential(
(0): Linear(in_features=50, out_features=10, bias=True)
(1): ReLU()
)