1. Basic operation of PyTorch
1. Guide package
import torch
2. Check the version number
torch.__version__
"""
'2.0.1+cpu'
"""
3. Initialize (all zeros) matrix
x = torch.empty(3,2)
x
"""
tensor([[7.2868e-44, 8.1275e-44],
[6.7262e-44, 7.5670e-44],
[8.1275e-44, 6.7262e-44]])
"""
4. Randomly create the initialization matrix
4.1 Conforming to normal distribution
x_1 = torch.randn(3,4)
x_1
"""
tensor([[ 0.1605, -0.9290, -0.0501, -0.0723],
[ 0.6792, 0.1977, -0.7773, 0.6927],
[ 0.7576, -1.4204, 0.1976, -2.2545]])
"""
4.2 Conforming to a uniform distribution
x_2 = torch.rand(3,4)
x_2
"""
tensor([[0.5876, 0.5991, 0.9678, 0.8188],
[0.2934, 0.4345, 0.1316, 0.8469],
[0.0042, 0.3754, 0.3141, 0.8362]])
"""
5. Initialize the all-zero matrix
x1 = torch.zeros(5,2,dtype=torch.long)
x1
"""
tensor([[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0]])
"""
6. Initialize the all-one matrix
x2 = torch.ones(3,4)
x2
"""
tensor([[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.]])
"""
7. Check the matrix size specification
x2.size()
"""
torch.Size([3, 4])
"""
8. Change the matrix dimension
y = torch.randn(3,4)
y
"""
tensor([[-1.3152, 0.2621, -0.7739, 0.1728],
[-1.3887, 1.0964, 0.7797, 2.0587],
[ 0.4726, -0.2367, 0.8845, 0.9405]])
"""
y1 = y.view(12)
y1
"""
tensor([-1.3152, 0.2621, -0.7739, 0.1728, -1.3887, 1.0964, 0.7797, 2.0587, 0.4726, -0.2367, 0.8845, 0.9405])
"""
y2 = y.view(2,6)
y2
"""
tensor([[-1.3152, 0.2621, -0.7739, 0.1728, -1.3887, 1.0964],
[ 0.7797, 2.0587, 0.4726, -0.2367, 0.8845, 0.9405]])
"""
y3 = y.view(6,-1)
y3
"""
tensor([[-1.3152, 0.2621],
[-0.7739, 0.1728],
[-1.3887, 1.0964],
[ 0.7797, 2.0587],
[ 0.4726, -0.2367],
[ 0.8845, 0.9405]])
"""
9, Numpy and Tensor format conversion
9.1 Numpy to Tensor
z1 = torch.ones(2,5)
z1
"""
tensor([[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]])
"""
z2 = z1.numpy()
z2
"""
array([[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]], dtype=float32)
"""
9.2 Tensor to Numpy
import numpy as np
a1 = np.ones([2,4])
a1
"""
array([[1., 1., 1., 1.],
[1., 1., 1., 1.]])
"""
a2 = torch.from_numpy(a1)
a2
"""
tensor([[1., 1., 1., 1.],
[1., 1., 1., 1.]], dtype=torch.float64)
"""
10. Common forms of Tensor
import torch
from torch import tensor
10.1 scalar
As long as it is an 数
or 单一的值
, it becomesscalar
x = tensor(22)
x
"""
tensor(22)
"""
x.dim() # 0
2*x # tensor(44)
x.item() # 22
10.2 vector
Vector vector, representing a certain feature. For example: a vector is not a value, but a collection[年龄,身高,体重],[25,178,60]
of real values
My understanding is: multiple scalars form a vector
y = tensor([25,178,60])
y
"""
tensor([ 25, 178, 60])
"""
y.dim() # 1
y.size() # torch.Size([3])
10.3 matrix
matrix matrix, usually multi-dimensional.
For example: there are three students, Zhang San, Li Si, and Wang Ermazi , and they all have their own characteristics ( [age, height, weight] ), [[25,178,60], [22,180,62], [21,177,61]]
and when they are combined together, they become a matrix.
My understanding is: multiple vectors form a matrix
m = tensor([[1,2,3], [2,1,3], [3,1,2]])
m
"""
tensor([[1, 2, 3],
[2, 1, 3],
[3, 1, 2]])
"""
m.matmul(m)
"""
tensor([[14, 7, 15],
[13, 8, 15],
[11, 9, 16]])
"""
tensor([1,0,1]).matmul(m)
"""
tensor([4, 3, 5])
"""
tensor([1,2,1]).matmul(m)
"""
tensor([ 8, 5, 11])
"""
m*m
"""
tensor([[1, 4, 9],
[4, 1, 9],
[9, 1, 4]])
"""
10.4 n-dimensional tensor
pytorch often uses [N,C,H,W]
four-dimensional tensor to process images
N: the number of images in each batch
C: the number of channels in each image
H: the number of pixels in the vertical dimension of each image (height)
W: each The number of pixels in the horizontal dimension of an image (width)
11,Model Zoo
Call the network architecture and weight parameters trained by others, and finally it can be done with one line of code.
It is convenient for lazy people to call, and it becomes a hub module in Pytorch
. The related link on Github
is the API link of pytorch official website.
For example, open any project on the pytorch official website, copy and paste it to run it, and download the relevant weight parameter file, you need to surf the Internet scientifically.
Two, autograd automatic derivation mechanism
Case 1: Backpropagation derivation, the function expression is y = w*x*x + b*x + c
, where w=2, x=3, b=5, c=4
import torch
w = torch.tensor(2, dtype = torch.float32, requires_grad = True)
x = torch.tensor(3, dtype = torch.float32, requires_grad = True)
b = torch.tensor(5, dtype = torch.float32, requires_grad = True)
c = torch.tensor(4, dtype = torch.float32, requires_grad = True)
w,x,b,c
"""
(tensor(2., requires_grad=True),
tensor(3., requires_grad=True),
tensor(5., requires_grad=True),
tensor(4., requires_grad=True))
"""
y = w * x**2 + b * x + c
y
"""
tensor(37., grad_fn=<AddBackward0>)
"""
y.backward() #反向传播
w.grad
"""
tensor(9.)
"""
x.grad
"""
tensor(17.)
"""
b.grad
"""
tensor(3.)
"""
c.grad
"""
tensor(1.)
"""
3. Demonstration of the most basic model training complete steps
Requirements: Supervised learning, training model conforming toy = 2*x + 5
import torch
import numpy as np
1. Label data preparation
① x sample
Ⅰ, 0-9, 10 numbers
Here for simplicity, the x samples are 0-9, 10 numbers, stored in a list
x = [i for i in range(10)]
x # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Ⅱ. Convert to array format for easy operation
x_arr = np.array(x,dtype=np.float32)
x_arr # array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32)
Ⅲ. Convert to a column of data to facilitate subsequent operations
x_train = x_arr.reshape(-1,1)
x_train
"""
array([[0.],
[1.],
[2.],
[3.],
[4.],
[5.],
[6.],
[7.],
[8.],
[9.]], dtype=float32)
"""
x_train.shape # (10, 1)
② y sample
Ⅰ. y=2*x+5
Generate the corresponding result y through the function
y = [2*x+5 for x in range(10)]
y # [5, 7, 9, 11, 13, 15, 17, 19, 21, 23]
Ⅱ. Convert to array format for easy operation
y_arr = np.array(y,dtype=np.float32)
y_arr # array([ 5., 7., 9., 11., 13., 15., 17., 19., 21., 23.], dtype=float32)
Ⅲ. Convert to a column of data to facilitate subsequent operations
y_train = y_arr.reshape(-1,1)
y_train
"""
array([[ 5.],
[ 7.],
[ 9.],
[11.],
[13.],
[15.],
[17.],
[19.],
[21.],
[23.]], dtype=float32)
"""
y_train.shape # (10, 1)
2. Design model
Here we use the simplest two-layer linear layer to build the model, and the training data is a
single
one
class Linear_yy(torch.nn.Module):
def __init__(self,in_dim,media_dim,out_dim):
super(Linear_yy,self).__init__()
self.linear_1 = torch.nn.Linear(in_dim,media_dim)
self.linear_2 = torch.nn.Linear(media_dim,out_dim)
def forward(self,x):
x = self.linear_1(x)
x = self.linear_2(x)
return x
in_dim = 1
media_dim = 2
out_dim = 1
model = Linear_yy(in_dim=in_dim,media_dim=media_dim,out_dim=out_dim)
model
"""
Linear_yy(
(linear_1): Linear(in_features=1, out_features=2, bias=True)
(linear_2): Linear(in_features=2, out_features=1, bias=True)
)
"""
3. Specify parameters such as epoch, learning rate, optimizer, loss function, etc.
epochs = 1000 #epoch
learning_rate = 0.0001 # 学习率
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate) # 优化器选择Adam
loss_faction = torch.nn.MSELoss() # 损失函数选择MSE
4. Training model
for epoch in range(epochs):
epoch += 1
# 注意转行成tensor
inputs = torch.from_numpy(x_train)
labels = torch.from_numpy(y_train)
# 梯度要清零每一次迭代
optimizer.zero_grad()
# 前向传播
outputs = model(inputs)
# 计算损失
loss = loss_faction(outputs, labels)
# 返向传播
loss.backward()
# 更新权重参数
optimizer.step()
if epoch % 50 == 0: # 每50次输出一次损失值
print('epoch {}, loss {}'.format(epoch, loss.item()))
5. Model prediction
predicted = model(torch.from_numpy(x_train).requires_grad_()).data.numpy()
predicted
"""
array([[0.6956282 ],
[0.75930536],
[0.82298255],
[0.88665974],
[0.9503369 ],
[1.014014 ],
[1.0776913 ],
[1.1413685 ],
[1.2050457 ],
[1.2687228 ]], dtype=float32)
"""
6. Save the model weight
torch.save(model.state_dict(), 'model.pth')
7. Model weight loading
Model weight loading is generally used to interrupt model training. It is necessary to use the last weight parameters to continue training. At this time, it is necessary to save the model first, and then load the weight parameters.
model.load_state_dict(torch.load('model.pth'))
8. Complete code (CPU)
Of course, this is just the complete code for training the model. For the final test and save the model weight, refer to 5 , 6, 7
import torch
import torch.nn as nn
import numpy as np
class Linear_yy(torch.nn.Module):
def __init__(self,in_dim,media_dim,out_dim):
super(Linear_yy,self).__init__()
self.linear_1 = torch.nn.Linear(in_dim,media_dim)
self.linear_2 = torch.nn.Linear(media_dim,out_dim)
def forward(self,x):
x = self.linear_1(x)
x = self.linear_2(x)
return x
in_dim = 1
media_dim = 2
out_dim = 1
model = Linear_yy(in_dim=in_dim,media_dim=media_dim,out_dim=out_dim)
epochs = 1000
learning_rate = 0.0001
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
loss_faction = torch.nn.MSELoss()
for epoch in range(epochs):
epoch += 1
# 注意转行成tensor
inputs = torch.from_numpy(x_train)
labels = torch.from_numpy(y_train)
# 梯度要清零每一次迭代
optimizer.zero_grad()
# 前向传播
outputs = model(inputs)
# 计算损失
loss = loss_faction(outputs, labels)
# 返向传播
loss.backward()
# 更新权重参数
optimizer.step()
if epoch % 50 == 0:
print('epoch {}, loss {}'.format(epoch, loss.item()))
9. Complete code (GPU)
To use GPU training, you only need to put 训练数据
, 模型
into the GPU
Specifies whether to use the GPU to train the model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
The model is put into the GPU
model.to(device)
data into the GPU
inputs = torch.from_numpy(x_train).to(device)
labels = torch.from_numpy(y_train).to(device)
import torch
import torch.nn as nn
import numpy as np
class Linear_yy(torch.nn.Module):
def __init__(self,in_dim,media_dim,out_dim):
super(Linear_yy,self).__init__()
self.linear_1 = torch.nn.Linear(in_dim,media_dim)
self.linear_2 = torch.nn.Linear(media_dim,out_dim)
def forward(self,x):
x = self.linear_1(x)
x = self.linear_2(x)
return x
in_dim = 1
media_dim = 2
out_dim = 1
model = Linear_yy(in_dim=in_dim,media_dim=media_dim,out_dim=out_dim)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
epochs = 1000
learning_rate = 0.0001
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
loss_faction = torch.nn.MSELoss()
for epoch in range(epochs):
epoch += 1
# 注意转行成tensor
inputs = torch.from_numpy(x_train).to(device)
labels = torch.from_numpy(y_train).to(device)
# 梯度要清零每一次迭代
optimizer.zero_grad()
# 前向传播
outputs = model(inputs)
# 计算损失
loss = loss_faction(outputs, labels)
# 返向传播
loss.backward()
# 更新权重参数
optimizer.step()
if epoch % 50 == 0:
print('epoch {}, loss {}'.format(epoch, loss.item()))