import torch
from torch import nn
#net[0], net[1] ,net[2]
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X=torch.rand(size=(2,4))
Ausgabe:
print(X)
tensor([[0.2999, 0.0203, 0.3420, 0.7971],[0.8667, 0.5062, 0.8066, 0.2629]])
print(net(X))
tensor([[0.2376],[0.3675]], grad_fn=<AddmmBackward0>)
#输出每一层里面的权重
#net是nn.Sequantial,Sequantial 可简单认为是python的一个list。
print(net[2].state_dict()) #state:状态
OrderedDict([('weight', tensor([[-0.2090, 0.2708, 0.3308, 0.2240, 0.3504, -0.2484, -0.2670, 0.2866]])), ('bias', tensor([0.1088]))])
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data) #访问数值
print(net[2].weight.grad==None)
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.3492], requires_grad=True)
tensor([-0.3492])
True
#一次性访问所有参数
print(*[(name,param.shape) for name,param in net[0].named_parameters()])
print(*[(name,param.shape) for name,param in net.named_parameters()])
('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))
def block1():
return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4),nn.ReLU())
def block2():
net =nn.Sequential()
for i in range(4):
net.add_module(f'block{i},',block1())
return net
rgnet=nn.Sequential(block2(),nn.Linear(4,1))
rgnet(X)
print(rgnet)
Ausgabe:
Sequential(
(0): Sequential(
(block0,): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
(block1,): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
(block2,): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
(block3,): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
)
(1): Linear(in_features=4, out_features=1, bias=True)
)
#内置初始化:
def init_normal(m):
if type(m)==nn.Linear:
nn.init.normal_(m.weight,mean=0,std=0.01)
nn.init.zeros_(m.bias)
net.apply(init_normal)
print(net[0].weight.data[0],net[0].bias.data[0])
Ausgabe:
tensor([ 0.0069, 0.0174, 0.0089, -0.0098]) tensor(0.)
def init_constant(m):
if type(m)==nn.Linear:
nn.init.constant_(m.weight,1)
nn.init.zeros_(m.bias)
net.apply(init_constant)
net[0].weight.data[0],net[0].bias.data[0]
#对不同的层使用不同的初始化函数:
def xavier(m):
if type(m)==nn.Linear:
nn.init.xavier_uniform_(m.weight)
def init_42(m):
if type(m)==nn.Linear:
nn.init.constant_(m.weight,42)
net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)
#自定义初始化:
def my_init(m):
if type(m)==nn.Linear:
print(
"init",
*[(name,param.shape) for name,param in m.named_parameters()][0]
)
nn.init.uniform_(m.weight,-10,-10)
m.weight.data*=m.weight.data.abs()>=5
net.apply(my_init)
net[0].weight[:2]
net[0].weight.data[:]+=1
net[0].weight.data[0,0]=42
net[0].weight.data[0]
#共享层数,参数绑定。
shared=nn.Linear(8,8)
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,nn.ReLU(),nn.Linear(8,1))
net(X)
print(net[2].weight.data[0]==net[4].weight.data[0])
net[2].weight.data[0,0]=100
print(net[2].weight.data[0]==net[4].weight.data[0])
D:\anaconda3\envs\pythonProject6\python.exe C:/Users/Dell/PycharmProjects/pythonProject6/Parameters.py
tensor([[0.3637, 0.2694, 0.3710, 0.3239],
[0.6063, 0.0185, 0.6333, 0.1290]])
tensor([[0.2764],
[0.2935]], grad_fn=<AddmmBackward0>)
OrderedDict([('weight', tensor([[-0.1440, 0.2390, 0.3281, 0.2522, 0.2373, 0.3361, -0.1059, 0.2527]])), ('bias', tensor([0.1953]))])
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.1953], requires_grad=True)
tensor([0.1953])
True
('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))
Sequential(
(0): Sequential(
(block0,): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
(block1,): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
(block2,): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
(block3,): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
)
(1): Linear(in_features=4, out_features=1, bias=True)
)
tensor([ 0.0046, 0.0317, -0.0041, 0.0032]) tensor(0.)
tensor([-0.5886, -0.4940, 0.6852, 0.1372])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])
init weight torch.Size([8, 4])
init weight torch.Size([1, 8])
tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])
进程已结束,退出代码0