PyTorch ~ Neural Network Box

Here is a very comprehensive introduction to the use of the Pytorch neural network box, including core components, neural network instances, construction methods, optimizer comparisons, etc.

1 Neural Network Core Components

Core components include:

  1. Layer: The basic structure of a neural network, transforming input tensors into output tensors

  2. Model: a network of layers

  3. Loss function: The objective function of parameter learning, which learns various parameters by minimizing the loss function

  4. Optimizer: how to minimize the loss function

Multiple layers are linked together to form a model or network. The input data generates a predicted value through the model, and the predicted value is compared with the real value to obtain the loss. The optimizer uses the loss value to update the weight parameters, making the loss value smaller and smaller. The cycle process, When the loss value reaches the threshold or the specified number of cycles, the cycle ends.

2 Neural Network Example

If you are a beginner, it is recommended to read 3 directly to avoid errors in the running results.

Neural Network Tools and Interrelationships

Background Note

How to use the neural network to complete the recognition of some numbers?

Download data using Pytorch built-in function mnist

Use torchvision to preprocess the data and call torch.utils to create a data iterator

Visualize source data

Build a neural network model using the nn toolbox

Instantiate the model, define the loss function and optimizer

training model

visualize the results

Use 2 hidden layers, the activation function of each layer is ReLU, and finally use torch.max(out,1) to find the maximum value of the tensor out and the index as the predicted value

prepare data

##(1)导入必要的模块
import numpy as np
import torch
# 导入内置的 mnist数据
from torchvision.datasets import mnist
# 导入预处理模块
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
# 导入nn及优化器
import torch.nn.functional as F
import torch.optim as optim
from torch import nn
## (2) 定义一些超参数
train_batch_size = 64
test_batch_size = 128
learning_rate = 0.01
num_epoches = 20
lr = 0.01
momentum = 0.5
## (3) 下载数据并对数据进行预处理
# 定义预处理函数,这些预处理依次放在Compose函数中
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize([0.5],[0.5])])
# 下载数据,并对数据进行预处理
train_dataset = mnist.MNIST('./data', train=True, transform=transform, download=True)
test_dataset = mnist.MNIST('./data', train=False, transform=transform)
# dataloader是一个可迭代的对象,可以使用迭代器一样使用
train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

visualize data

import matplotlib.pyplot as plt
%matplotlib inline
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)
fig = plt.figure()
for i in range(6):
  plt.subplot(2,3,i+1)
  plt.tight_layout()
  plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
  plt.title("Ground Truth: {}".format(example_targets[i]))
  plt.xticks([])
  plt.yticks([])

 

build model

## (1)构建网络
class Net(nn.Module):
"""
使用sequential构建网络,Sequential()函数功能是将网络的层组合一起
"""
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(Net, self).__init__()
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1),nn.BatchNorm1d(n_hidden_1))
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2),nn.BatchNorm1d(n_hidden_2))
self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim))

def forward(self, x):
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
x = self.layer3(x)
return x

## (2)实例化网络
# 检测是否有GPU,有就用,没有就用CPU
device = torch.device("cuda:0" if torch.cuda.if_available else "cpu")
# 实例化网络
model = Net(28*28, 300, 100, 10)
model.to(device)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

training model

## 训练模型
# 开始训练
losses = []
acces = []
eval_losses = []
eval_acces = []
print("开始循环,请耐心等待.....")
for epoch in range(num_epoches):
train_loss = 0
train_acc = 0
model.train()
# 动态修改参数学习率
if epoch%5==0:
optimizer.param_groups[0]['lr']*=0.1
for img, label in train_loader:
img=img.to(device)
label = label.to(device)
img = img.view(img.size(0), -1)
# 向前传播
out = model(img)
loss = criterion(out, label)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 记录误差
train_loss += loss.item()
# 计算分类的准确率
_, pred = out.max(1)
num_correct = (pred == label).sum().item()
acc = num_correct / img.shape[0]
train_acc +=acc

print("第一个循环结束,继续耐心等待....")

losses.append(train_loss / len(train_loader))
acces.append(train_acc / len(train_loader))
# 在测试集上检验效果
eval_loss = 0
eval_acc = 0
# 将模型改为预测模式
model.eval()
for img, label in test_loader:
img=img.to(device)
label=label.to(device)
img=img.view(img.size(0),-1)
out = model(img)
loss = criterion(out, label)
# 记录误差
eval_loss += loss.item()
# 记录准确率
_, pred = out.max(1)
num_correct = (pred == label).sum().item()
acc = num_correct / img.shape[0]
eval_acc +=acc

print("第二个循环结束,准备结束")
eval_losses.append(eval_loss / len(test_loader))
eval_acces.append(eval_acc / len(test_loader))
print('epoch: {}, Train Loss: {:.4f}, Train Acc: {:.4f}, Test Loss: {:.4f}, Test Acc: {:.4f}'.format(epoch, train_loss / len(train_loader), train_acc / len(train_loader), eval_loss / len(test_loader), eval_acc / len(test_loader)))

训练数据训练和测试数据验证


## 可视化训练结果
plt.title('trainloss')
plt.plot(np.arange(len(losses)), losses)
plt.legend(['Train Loss'], loc='upper right')
print("开始循环,请耐心等待.....")

Fully connected neural network for MNIST recognition

data

import numpy as np
import torch
from torchvision.datasets import mnist
from torch import nn
from torch.autograd import Variable
def data_tf(x):
    x = np.array(x, dtype="float32")/255
    x = (x-0.5)/0.5
    x = x.reshape((-1))   # 这里是为了变为1行,然后m列
    x = torch.from_numpy(x)
    return x

# 下载数据集,有的话就不下载了
train_set = mnist.MNIST("./data",train=True, transform=data_tf, download=False)
test_set = mnist.MNIST("./data",train=False, transform=data_tf, download=False)
a, a_label = train_set[0]
print(a.shape)
print(a_label)

visualize data

import matplotlib.pyplot as plt
for i in range(1, 37):
    plt.subplot(6,6,i)
    plt.xticks([])  # 不显示坐标系
    plt.yticks([])
    plt.imshow(train_set.data[i].numpy(), cmap="gray")
    plt.title("%i" % train_set.targets[i])
plt.subplots_adjust(wspace = 0 , hspace = 1)  # 调整
plt.show()

Data loaded in DataLoader 

from torch.utils.data import DataLoader
train_data = DataLoader(train_set, batch_size=64, shuffle= True)
test_data = DataLoader(test_set, batch_size=128, shuffle=False)
a, a_label = next(iter(train_data))
print(a.shape)
print(a_label.shape)

Define a neural network

The neural network has four layers

net = nn.Sequential(    nn.Linear(784, 400),
    nn.ReLU(),
    nn.Linear(400, 200),
    nn.ReLU(),
    nn.Linear(200, 100),
    nn.ReLU(),
    nn.Linear(100,10),
    nn.ReLU()
)

use cuda

if torch.cuda.is_available():
    net = net.cuda()

 Define loss function and optimization algorithm

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), 1e-1)

train

losses = []
acces  = []
eval_losses = []
eval_acces  = []
# 一共训练20次
for e in range(20):
    train_loss = 0
    train_acc = 0
    net.train()
    for im, label in train_data:
        if torch.cuda.is_available():
            im = Variable(im).cuda()
            label = Variable(label).cuda()
        else:
            im = Variable(im)
            label =Variable(label)

        # 前向传播
        out = net(im)
        loss = criterion(out, label)

        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 误差
        train_loss += loss.item()

        #计算分类的准确率
        # max函数参数1表示按行取最大值,第一个返回值是值,第二个返回值是下标
        # pred是一个固定1*64的向量
        _,pred = out.max(1)
        num_correct = (pred==label).sum().item()
        acc = num_correct/im.shape[0]
        train_acc += acc

    # 此时一轮训练以及完了
    losses.append(train_loss/len(train_data))
    acces.append(train_acc/len(train_data))

    # 在测试集上检验效果
    eval_loss = 0
    eval_acc = 0
    net.eval()
    for im, label in test_data:
        if torch.cuda.is_available():
            im = Variable(im).cuda()
            label = Variable(label).cuda()
        else:
            im = Variable(im)
            label =Variable(label)

        # 前向传播
        out = net(im)
        # 计算误差
        loss = criterion(out, label)
        eval_loss += loss.item()

        # 计算准确率
        _,pred = out.max(1)
        num_correct = (pred==label).sum().item()
        acc = num_correct/im.shape[0]
        eval_acc += acc

    eval_losses.append(eval_loss/len(test_data))
    eval_acces.append(eval_acc/len(test_data))

    print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}'.format(e, train_loss / len(train_data), train_acc / len(train_data), eval_loss / len(test_data), eval_acc / len(test_data)))

exhibit

%matplotlib inline
plt.subplot(2, 2, 1)
plt.title("train loss")
plt.plot(np.arange(len(losses)), losses)
plt.grid()
plt.subplot(2, 2, 2)
plt.title("train acc")
plt.plot(np.arange(len(acces)), acces)
plt.grid()
plt.subplot(2, 2, 3)
plt.title("test loss")
plt.plot(np.arange(len(eval_losses)), eval_losses)
plt.grid()
plt.subplot(2, 2, 4)
plt.title("test acc")
plt.plot(np.arange(len(eval_acces)), eval_acces)
plt.grid()
plt.subplots_adjust(wspace =0.5, hspace =0.5)

for i in range(1, 5):
    im = test_set.data[i]
    label = test_set.targets[i]
    plt.subplot(2, 2, i)
    plt.imshow(im.numpy(), cmap="gray")
    plt.xticks([])
    plt.yticks([])
    im = data_tf(im)
    im = Variable(im).cuda()
    out = net(im)
    _, pred = out.max(0)
    plt.title("outcome=%i" % pred.item())
plt.show()

How to build a neural network?

Building a neural network mainly includes: selecting a network layer, building a network, selecting a loss and an optimizer.

The nn toolbox can be directly referenced: fully connected layers, convolutional layers, recurrent layers, regularization layers, and activation layers.

Build the network layer

torch.nn.Sequential() builds the network layer

The encoding of each layer is the default number, which is not easy to distinguish;

If you want to define a name for each layer:

  • You can add each layer through add_module() on the basis of Sequential, and add a separate name for each layer

  • Add each layer in the form of a dictionary, setting a separate layer name

Example code for building a network in dictionary mode:

class Net(torch.nn.Module):
    def __init__(self):
        super(Net4, self).__init__()
        self.conv = torch.nn.Sequential(
            OrderedDict(
                [
                    ("conv1", torch.nn.Conv2d(3, 32, 3, 1, 1)),
                    ("relu1", torch.nn.ReLU()),
                    ("pool", torch.nn.MaxPool2d(2))
                ]
            ))

        self.dense = torch.nn.Sequential(
            orderedDict([
            ("dense1", torch.nn.Linear(32*3*3,128)),
            ("relu2", torch.nn.ReLU()),
            ("dense2", torch.nn.Linear(128,10))
            ])
        )

Forward and backward propagation

The task of the forward function needs to link the input layer, the network layer, and the output layer to realize the forward transmission of information

Let the loss function call backward()

training model

Calling the training model model.train() will set all modules to training mode;

In the test verification phase, using model.eval() will set all training attributes to False

Neural Network Toolbox nn

The nn toolbox has two important modules: nn.Model and nn.functinal

5.1 nn.Module

Inherit nn.Module to generate your own network layer

Using class Net(torch.nn.Module), these layers are subclasses

Naming rules: nn.Xxx (the first is uppercase): nn.Linear, nn.Conv2d, nn.CrossEntropyLoss

nn.functional

Naming rules: nn.functional.xxx

It is similar to nn.Module, but there are specific differences between the two:

(1) nn.Xxx inherits from nn.Module, and nn.Xxx needs to be instantiated and passed in parameters first, and the instantiated object is called to pass in the input data by means of a function call. Works well with nn.Sequential while nn.functional.xxx doesn't

(2) nn.Xxx does not need to define and manage weight and bias parameters. nn.functianal needs to define weight and bias parameters by itself, and each call must be manually passed in, which is not conducive to code reuse

(3) Dropout is different in the training and testing stages. nn.Xxx automatically realizes the state transition after calling model.eval(), but there is no such function when using nn.functional.xxx

Use nn.Xxx with learning parameters; use nn.functional or nn.Xxx without learning parameters

optimizer

The optimization algorithm commonly used by Pytorch is encapsulated in torch.optim

The optimization methods all inherit the base class optim.Optimizer and implement the optimization steps

Stochastic gradient descent SGD is the most common optimizer

General steps to use the optimizer:

(1) Create an optimizer instance

Import the optim module and instantiate the SGD optimizer. This column uses the momentum parameter momentum, which is an improved version of SGD

import torch.optim as optim
optimizer = optimSGD(model.parameters(), lr=lr, momentum=momentum)

(2) Forward propagation
Pass the input data into the instantiated object model of the neural network Net, execute the forward function by itself, get the out output value, and then use out and the label lable to calculate the loss value Loss

out = model(img)
loss = criterion(out, label)

(3) Clear the gradient.
By default, the gradient is accumulated. Before the gradient backpropagation, the gradient needs to be cleared

opyimizer.zero_grad()

(4) Backpropagation

loss.backward()

(5) Update parameters
Update parameters based on the current gradient

optimizer.step()

Dynamically modify learning rate parameters

You can modify optimizer.param_groups or create a new optimizer

Note: Although the new optimizer is very simple and light, but the new one will have shocks

optimizer.param_groups

  • list of length 1

  • optimizer.param_groups[0] a dictionary of length 6, including weight, lr, momentum, etc.

modify learning rate

for epoch in range(num_epoches):
    ## 动态修改参数学习率
    if epoch%5==0
        optimizer.param_groups[0]['lr']*=0.1
        print(optimizer.param_groups[0]['lr'])
    for img, label in train_loader:

 Comparison of optimizers

Various optimizers have suitable scenarios

But adaptive optimizer is more popular whaosoft aiot  http://143ai.com 

## (1)导入需要的模块
import torch
import torch.utils.data as Data
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline
#超参数
LR = 0.01
BATCH_SIZE =32
EPOCH =12
## (2)生成数据
# 生成训练数据
# torch.unsqueeze()作用是将一维变二维,torch只能处理二维数据
x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1)
# 0.1 * torch.normal(x.size()) 增加噪声
y = x.pow(2) + 0.1 * torch.normal(torch.zeros(*x.size()))
torch_dataset = Data.TensorDataset(x,y)
# 一个代批量的生成器
loader = Data.DataLoader(dataset=torch_dataset, batch_size=BATCH_SIZE, shuffle=True)
## (3)构建神经网络
class Net(torch.nn.Module):
    # 初始化
    def __init__(self):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(1, 20)
        self.predict = torch.nn.Linear(20, 1)

    # 向前传递
    def forward(self, x):
        x = F.relu(self.hidden(x))
        x = self.predict(x)
        return x

## (4)使用多种优化器
net_SGD = Net()
net_Momentum = Net()
net_RMSProp = Net()
net_Adam = Net()
nets = [net_SGD, net_Momentum, net_RMSProp, net_Adam]
opt_SGD =torch.optim.SGD(net_SGD.parameters(), lr=LR)
opt_Momentum =torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum = 0.9)
opt_RMSProp =torch.optim.RMSprop(net_RMSProp.parameters(), lr=LR, alpha = 0.9)
opt_Adam =torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
optimizers = [opt_SGD, opt_Momentum, opt_RMSProp, opt_Adam]

## (5)训练模型
loss_func = torch.nn.MSELoss()
loss_his = [[], [], [], []]
for epoch in range(EPOCH):
    for step, (batch_x, batch_y) in enumerate(loader):
        for net, opt, l_his in zip(nets, optimizers, loss_his):
            output = net(batch_x)
            loss = loss_func(output, batch_y)
            opt.zero_grad()
            loss.backward()
            opt.step()
            l_his.append(loss.data.numpy())
labels = ['SGD', 'Momentum', 'RMSProp', 'Adam']
## (6)可视化结果
for i, l_his in enumerate(loss_his):
    plt.plot(l_his, label=labels[i])
plt.legend(loc='best')
plt.xlabel('Steps')
plt.ylabel('Loss')
plt.ylim((0, 0.2))
plt.show()

Guess you like

Origin blog.csdn.net/qq_29788741/article/details/131266157
Recommended