学习资源

PyTorch 动态神经网络

前文链接

《PyTorch | MorvanZhou 》learning notes（上）

文章目录

11 LSTM for MNIST（GPU）
12 RNN for Regression（GPU）
13 Autoencoder（GPU）
14 GAN（GPU）
15 Dropout
16 Batch Normalization（略）
17 DQN（略）

11 LSTM for MNIST（GPU）

在 9、10 小节的基础上，直接改成 GPU 的版本跑跑

"""
View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
My Youtube Channel: https://www.youtube.com/user/MorvanZhou
Dependencies:
torch: 0.4
matplotlib
torchvision
"""
import os
import torch
from torch import nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
torch.cuda.set_device(1) # change

# torch.manual_seed(1)    # reproducible

# Hyper Parameters
EPOCH = 1               # train the training data n times, to save time, we just train 1 epoch
BATCH_SIZE = 64
TIME_STEP = 28          # rnn time step / image height
INPUT_SIZE = 28         # rnn input size / image width
LR = 0.01               # learning rate
DOWNLOAD_MNIST = True   # set to True if haven't download the data


# Mnist digital dataset
train_data = dsets.MNIST(
    root='./mnist/',
    train=True,                         # this is training data
    transform=transforms.ToTensor(),    # Converts a PIL.Image or numpy.ndarray to
                                        # torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0]
    download=DOWNLOAD_MNIST,            # download it if you don't have it
)

# plot one example
print(train_data.train_data.size())     # (60000, 28, 28)
print(train_data.train_labels.size())   # (60000)
plt.imshow(train_data.train_data[0].numpy(), cmap='gray')
plt.title('%i' % train_data.train_labels[0])
plt.show()

# Data Loader for easy mini-batch return in training
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

# convert test data into Variable, pick 2000 samples to speed up testing
test_data = dsets.MNIST(root='./mnist/', train=False, transform=transforms.ToTensor())
test_x = test_data.test_data.type(torch.FloatTensor)[:2000].cuda()/255.   # Tensor on GPU
test_y = test_data.test_labels[:2000].cuda()

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.rnn = nn.LSTM(         # if use nn.RNN(), it hardly learns
            input_size=INPUT_SIZE,
            hidden_size=64,         # rnn hidden unit
            num_layers=1,           # number of rnn layer
            batch_first=True,       # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )
        
        self.out = nn.Linear(64, 10)

    def forward(self, x):
        # x shape (batch, time_step, input_size) 等价于（batch，行，列）
        # r_out shape (batch, time_step, output_size) 64,28,64
        # h_n shape (n_layers, batch, hidden_size) 1,64,64
        # h_c shape (n_layers, batch, hidden_size) 1,64,64
        r_out, (h_n, h_c) = self.rnn(x, None)   # None represents zero initial hidden state

        # choose r_out at the last time step，看完整张图片的结果
        out = self.out(r_out[:, -1, :])
        return out


rnn = RNN()
rnn.cuda()
print(rnn)

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss()                       # the target label is not one-hotted

# training and testing
for epoch in range(EPOCH):
    for step, (b_x, b_y) in enumerate(train_loader):        # gives batch data
        b_x = b_x.cuda()    # Tensor on GPU
        b_y = b_y.cuda()    # Tensor on GPU
        
        b_x = b_x.view(-1, 28, 28)              # reshape x to (batch, time_step, input_size)

        output = rnn(b_x)                               # rnn output
        loss = loss_func(output, b_y)                   # cross entropy loss
        optimizer.zero_grad()                           # clear gradients for this training step
        loss.backward()                                 # backpropagation, compute gradients
        optimizer.step()                                # apply gradients

        if step % 50 == 0:
            test_output = rnn(test_x)                   # (samples, time_step, input_size)
            #pred_y = torch.max(test_output, 1)[1].data.numpy()            
            pred_y = torch.max(test_output, 1)[1].cuda().data  # move the computation in GPU
            #accuracy = float((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0))
            accuracy = torch.sum(pred_y == test_y).type(torch.FloatTensor) / test_y.size(0)
            #print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.cpu().numpy(), '| test accuracy: %.2f' % accuracy)
# print 10 predictions from test data
test_output = rnn(test_x[:10].view(-1, 28, 28))
pred_y = torch.max(test_output, 1)[1].cuda().data
print(pred_y, 'prediction number')
print(test_y[:10], 'real number')

output

RNN(
  (rnn): LSTM(28, 64, batch_first=True)
  (out): Linear(in_features=64, out_features=10, bias=True)
)
Epoch:  0 | train loss: 2.3073 | test accuracy: 0.10
Epoch:  0 | train loss: 1.1458 | test accuracy: 0.62
Epoch:  0 | train loss: 0.8296 | test accuracy: 0.73
Epoch:  0 | train loss: 0.4943 | test accuracy: 0.81
Epoch:  0 | train loss: 0.5121 | test accuracy: 0.85
Epoch:  0 | train loss: 0.3138 | test accuracy: 0.87
Epoch:  0 | train loss: 0.2804 | test accuracy: 0.86
Epoch:  0 | train loss: 0.2422 | test accuracy: 0.90
Epoch:  0 | train loss: 0.2091 | test accuracy: 0.92
Epoch:  0 | train loss: 0.1030 | test accuracy: 0.93
Epoch:  0 | train loss: 0.2337 | test accuracy: 0.92
Epoch:  0 | train loss: 0.1778 | test accuracy: 0.93
Epoch:  0 | train loss: 0.4187 | test accuracy: 0.93
Epoch:  0 | train loss: 0.2747 | test accuracy: 0.94
Epoch:  0 | train loss: 0.2407 | test accuracy: 0.93
Epoch:  0 | train loss: 0.1944 | test accuracy: 0.94
Epoch:  0 | train loss: 0.3314 | test accuracy: 0.94
Epoch:  0 | train loss: 0.1331 | test accuracy: 0.95
Epoch:  0 | train loss: 0.1707 | test accuracy: 0.95
tensor([7, 2, 1, 0, 4, 1, 4, 9, 6, 9], device='cuda:1') prediction number
tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9], device='cuda:1') real number

r_out, (h_n, h_c) = self.rnn(x, None) 这句话的理解
在这里插入图片描述

图片来源：https://www.jianshu.com/p/043083d114d4

配合【Keras-LSTM】IMDb 前面的示例图，茅舍顿开！！！

12 RNN for Regression（GPU）

相比 LSTM，只有一个 hidden state，我们用 sin 来拟合 cos，没个 pi 之间取10个点作为 time_steps，batch 为1，input_size 为1
一共 100 pi 迭代 100 steps，epoch 为 1

"""
View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
My Youtube Channel: https://www.youtube.com/user/MorvanZhou

Dependencies:
torch: 0.4
matplotlib
numpy
"""
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
torch.cuda.set_device(1) # change

# torch.manual_seed(1)    # reproducible

# Hyper Parameters
TIME_STEP = 10      # rnn time step
INPUT_SIZE = 1      # rnn input size
LR = 0.02           # learning rate

# show data
steps = np.linspace(0, np.pi*2, 100, dtype=np.float32)  # float32 for converting torch FloatTensor
x_np = np.sin(steps)
y_np = np.cos(steps)
plt.plot(steps, y_np, 'r-', label='target (cos)')
plt.plot(steps, x_np, 'b-', label='input (sin)')
plt.legend(loc='best')
#plt.savefig("1.png")
plt.show()


class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.rnn = nn.RNN(
            input_size=INPUT_SIZE,
            hidden_size=32,     # rnn hidden unit
            num_layers=1,       # number of rnn layer
            batch_first=True,   # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )
        self.out = nn.Linear(32, 1)

    def forward(self, x, h_state):
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, hidden_size)
        r_out, h_state = self.rnn(x, h_state)

        outs = []    # save all predictions
        for time_step in range(r_out.size(1)):    # calculate output for each time step
            outs.append(self.out(r_out[:, time_step, :])) # 将每一个 timesteps 的结果存在 list  中
        return torch.stack(outs, dim=1), h_state # 转成张量

        # instead, for simplicity, you can replace above codes by follows
        # r_out = r_out.view(-1, 32)
        # outs = self.out(r_out)
        # outs = outs.view(-1, TIME_STEP, 1)
        # return outs, h_state
        
        # or even simpler, since nn.Linear can accept inputs of any dimension 
        # and returns outputs with same dimension except for the last
        # outs = self.out(r_out)
        # return outs

rnn = RNN()
rnn.cuda()  #change
print(rnn)

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all cnn parameters
loss_func = nn.MSELoss()

h_state = None      # for initial hidden state

plt.figure(1, figsize=(12, 5))
plt.ion()           # continuously plot

for step in range(100):
    start, end = step * np.pi, (step+1)*np.pi   # time range
    # use sin predicts cos
    steps = np.linspace(start, end, TIME_STEP, dtype=np.float32, endpoint=False)  # float32 for converting torch FloatTensor
    x_np = np.sin(steps)
    y_np = np.cos(steps)

    x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis])    # shape (batch, time_step, input_size)，（1，10，1）
    y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])
    
    x = x.cuda() # change
    y = y.cuda() # change

    prediction, h_state = rnn(x, h_state)   # rnn output
    # !! next step is important !!
    h_state = h_state.data        # repack the hidden state, break the connection from last iteration

    loss = loss_func(prediction, y)         # calculate loss
    optimizer.zero_grad()                   # clear gradients for this training step
    loss.backward()                         # backpropagation, compute gradients
    optimizer.step()                        # apply gradients

    # plotting
    plt.plot(steps, y_np.flatten(), 'r-')
    #plt.plot(steps, prediction.data.numpy().flatten(), 'b-')
    plt.plot(steps, prediction.data.cpu().numpy().flatten(), 'b-') # change
    plt.draw(); plt.pause(0.05)

plt.ioff()
#plt.savefig("2.png")
plt.show()

output
在这里插入图片描述

RNN(
  (rnn): RNN(1, 32, batch_first=True)
  (out): Linear(in_features=32, out_features=1, bias=True)
)

在这里插入图片描述
可以看出一开始效果一般，后面越来越好

扫描二维码关注公众号，回复： 13163927 查看本文章

13 Autoencoder（GPU）

图片 28*28 传进来，依次 encode 成 128，64，32，12，3，decode 是逆向的过程！
在这里插入图片描述
图片来源：https://www.bilibili.com/video/av15997678/?p=24

"""
View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
My Youtube Channel: https://www.youtube.com/user/MorvanZhou

Dependencies:
torch: 0.4
matplotlib
numpy
"""
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import numpy as np

torch.cuda.set_device(1) # change

# torch.manual_seed(1)    # reproducible

# Hyper Parameters
EPOCH = 10
BATCH_SIZE = 64
LR = 0.005         # learning rate
DOWNLOAD_MNIST = False
N_TEST_IMG = 5

# Mnist digits dataset
train_data = torchvision.datasets.MNIST(
    root='./mnist/',
    train=True,                                     # this is training data
    transform=torchvision.transforms.ToTensor(),    # Converts a PIL.Image or numpy.ndarray to
                                                    # torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0]
    download=DOWNLOAD_MNIST,                        # download it if you don't have it
)

# plot one example
print(train_data.train_data.size())     # (60000, 28, 28)
print(train_data.train_labels.size())   # (60000)
plt.imshow(train_data.train_data[2].numpy(), cmap='gray')
plt.title('%i' % train_data.train_labels[2])
plt.show()

# Data Loader for easy mini-batch return in training, the image batch shape will be (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)


class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(28*28, 128),
            nn.Tanh(),
            nn.Linear(128, 64),
            nn.Tanh(),
            nn.Linear(64, 12),
            nn.Tanh(),
            nn.Linear(12, 3),   # compress to 3 features which can be visualized in plt
        )
        self.decoder = nn.Sequential(
            nn.Linear(3, 12),
            nn.Tanh(),
            nn.Linear(12, 64),
            nn.Tanh(),
            nn.Linear(64, 128),
            nn.Tanh(),
            nn.Linear(128, 28*28),
            nn.Sigmoid(),       # compress to a range (0, 1)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded


autoencoder = AutoEncoder()
autoencoder.cuda()

optimizer = torch.optim.Adam(autoencoder.parameters(), lr=LR)
loss_func = nn.MSELoss()

# initialize figure
f, a = plt.subplots(2, N_TEST_IMG, figsize=(5, 2))
plt.ion()   # continuously plot

# original data (first row) for viewing
view_data = train_data.train_data[:N_TEST_IMG].view(-1, 28*28).type(torch.FloatTensor).cuda()/255.

for i in range(N_TEST_IMG):
    a[0][i].imshow(np.reshape(view_data.data.cpu().numpy()[i], (28, 28)), cmap='gray')
    a[0][i].set_xticks(())
    a[0][i].set_yticks(())

for epoch in range(EPOCH):
    for step, (x, b_label) in enumerate(train_loader):
        b_x = x.view(-1, 28*28)   # batch x, shape (batch, 28*28)
        b_y = x.view(-1, 28*28)   # batch y, shape (batch, 28*28)
        
        b_x = b_x.cuda()
        b_y = b_y.cuda()

        encoded, decoded = autoencoder(b_x)

        loss = loss_func(decoded, b_y)      # mean square error
        optimizer.zero_grad()               # clear gradients for this training step
        loss.backward()                     # backpropagation, compute gradients
        optimizer.step()                    # apply gradients

        if step % 100 == 0:
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.cpu().numpy())

            # plotting decoded image (second row)
            _, decoded_data = autoencoder(view_data)
            for i in range(N_TEST_IMG):
                a[1][i].clear()
                a[1][i].imshow(np.reshape(decoded_data.data.cpu().numpy()[i], (28, 28)), cmap='gray')
                a[1][i].set_xticks(()); a[1][i].set_yticks(())
            plt.draw(); plt.pause(0.05)

plt.ioff()
plt.show()

我们看看训练开始的 loss 和最后时候的 loss

Epoch:  0 | train loss: 0.2328
Epoch:  0 | train loss: 0.0652
Epoch:  0 | train loss: 0.0686
……
Epoch:  9 | train loss: 0.0352
Epoch:  9 | train loss: 0.0338
Epoch:  9 | train loss: 0.0334

最后看看生成的效果图，4 和 9 界限还是比较模糊的，哈哈哈
在这里插入图片描述

14 GAN（GPU）

先看结果
在这里插入图片描述
我们用 GAN 来画二次曲线，label 是画家用 15 个点画的（一元二次函数，用十五个点表示， $ax^2+(a-1)$ ， $a$ 初始化在 1 和 2 之间），我们（新手）用 5 个点来拟合画家的作品！上面的结果显示了画家结果的上（ $a = 2$ ）下（ $a = 1$ ）限，我们学到的结果（绿色）在这个中间就说明很 OK！

"""
View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
My Youtube Channel: https://www.youtube.com/user/MorvanZhou

Dependencies:
torch: 0.4
numpy
matplotlib
"""
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
torch.cuda.set_device(1) # change

# torch.manual_seed(1)    # reproducible
# np.random.seed(1)

# Hyper Parameters
BATCH_SIZE = 64
LR_G = 0.0001           # learning rate for generator
LR_D = 0.0001           # learning rate for discriminator
N_IDEAS = 5             # think of this as number of ideas for generating an art work (Generator)
ART_COMPONENTS = 15     # it could be total point G can draw in the canvas
PAINT_POINTS = np.vstack([np.linspace(-1, 1, ART_COMPONENTS) for _ in range(BATCH_SIZE)]) # 数组按垂直方向叠加 64 batch,每个batch 15个点

# show our beautiful painting range
# plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + 1, c='#74BCFF', lw=3, label='upper bound')
# plt.plot(PAINT_POINTS[0], 1 * np.power(PAINT_POINTS[0], 2) + 0, c='#FF9359', lw=3, label='lower bound')
# plt.legend(loc='upper right')
# plt.show()


def artist_works():     # painting from the famous artist (real target)
    a = np.random.uniform(1, 2, size=BATCH_SIZE)[:, np.newaxis] # 每一个 batch 产生一个 a,(batch,1)
    paintings = a * np.power(PAINT_POINTS, 2) + (a-1) # ax^2 + b 一元二次方程
    paintings = torch.from_numpy(paintings).float()
    return paintings # (64,15)

G = nn.Sequential(                      # Generator
    nn.Linear(N_IDEAS, 128),            # random ideas (could from normal distribution)
    nn.ReLU(),
    nn.Linear(128, ART_COMPONENTS),     # making a painting from these random ideas
)

D = nn.Sequential(                      # Discriminator
    nn.Linear(ART_COMPONENTS, 128),     # receive art work either from the famous artist or a newbie like G
    nn.ReLU(),
    nn.Linear(128, 1),
    nn.Sigmoid(),                       # tell the probability that the art work is made by artist
)
G.cuda()
D.cuda()

opt_D = torch.optim.Adam(D.parameters(), lr=LR_D)
opt_G = torch.optim.Adam(G.parameters(), lr=LR_G)

plt.ion()   # something about continuous plotting

for step in range(10000):
    artist_paintings = artist_works()           # real painting from artist
    artist_paintings = artist_paintings.cuda()
    
    G_ideas = torch.randn(BATCH_SIZE, N_IDEAS)  # random ideas (batch,n_ideas) (64,5)
    G_ideas = G_ideas.cuda()
    G_paintings = G(G_ideas)                    # fake painting from G (random ideas)

    prob_artist0 = D(artist_paintings)          # D try to increase this prob (64,15)
    prob_artist1 = D(G_paintings)               # D try to reduce this prob (64,5)

    D_loss = - torch.mean(torch.log(prob_artist0) + torch.log(1. - prob_artist1))
    G_loss = torch.mean(torch.log(1. - prob_artist1))

    opt_D.zero_grad()
    D_loss.backward(retain_graph=True)      # reusing computational graph
    opt_D.step()

    opt_G.zero_grad()
    G_loss.backward()
    opt_G.step()

    if step % 50 == 0:  # plotting
        plt.cla()
        plt.plot(PAINT_POINTS[0], G_paintings.data.cpu().numpy()[0], c='#4AD631', lw=3, label='Generated painting',)
        plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + 1, c='#74BCFF', lw=3, label='upper bound')
        plt.plot(PAINT_POINTS[0], 1 * np.power(PAINT_POINTS[0], 2) + 0, c='#FF9359', lw=3, label='lower bound')
        plt.text(-.5, 2.3, 'D accuracy=%.2f' % prob_artist0.data.cpu().numpy().mean(), fontdict={
    
    'size': 13})
        plt.text(-.5, 2, 'D score= %.2f' % -D_loss.data.cpu().numpy(), fontdict={
    
    'size': 13})
        plt.ylim((-0.5, 4))
        plt.legend(loc='upper right', fontsize=10)
        plt.draw();
        plt.pause(0.01)

plt.ioff()
plt.savefig("1.png")
plt.show()

15 Dropout

注意训练的时候用，测试的时候要关闭，用 model.eval()！！！
数据集是 y = x 加入了高斯噪声，对比加入和不加入drop out 来看拟合情况，模型是几个全连接层！

"""
View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
My Youtube Channel: https://www.youtube.com/user/MorvanZhou

Dependencies:
torch: 0.4
matplotlib
"""
import torch
import matplotlib.pyplot as plt
torch.cuda.set_device(0) # change

plt.rcParams['figure.figsize'] = (8.0, 6.0) # 设置figure_size尺寸
# torch.manual_seed(1)    # reproducible

N_SAMPLES = 20
N_HIDDEN = 300

# training data
x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1)
y = x + 0.3*torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1))

# test data
test_x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1)
test_y = test_x + 0.3*torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1))

# show data
plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.5, label='train')
plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.5, label='test')
plt.legend(loc='upper left')
plt.ylim((-2.5, 2.5))
#plt.savefig("1.png")
plt.show()

net_overfitting = torch.nn.Sequential(
    torch.nn.Linear(1, N_HIDDEN),
    torch.nn.ReLU(),
    torch.nn.Linear(N_HIDDEN, N_HIDDEN),
    torch.nn.ReLU(),
    torch.nn.Linear(N_HIDDEN, 1),
)

net_dropped = torch.nn.Sequential(
    torch.nn.Linear(1, N_HIDDEN),
    torch.nn.Dropout(0.5),  # drop 50% of the neuron
    torch.nn.ReLU(),
    torch.nn.Linear(N_HIDDEN, N_HIDDEN),
    torch.nn.Dropout(0.5),  # drop 50% of the neuron
    torch.nn.ReLU(),
    torch.nn.Linear(N_HIDDEN, 1),
)
net_overfitting.cuda()
net_dropped.cuda()

print(net_overfitting)  # net architecture
print(net_dropped)

optimizer_ofit = torch.optim.Adam(net_overfitting.parameters(), lr=0.01)
optimizer_drop = torch.optim.Adam(net_dropped.parameters(), lr=0.01)
loss_func = torch.nn.MSELoss()

plt.ion()   # something about plotting

for t in range(500):
    x = x.cuda()
    y = y.cuda()
    pred_ofit = net_overfitting(x)
    pred_drop = net_dropped(x)
    loss_ofit = loss_func(pred_ofit, y)
    loss_drop = loss_func(pred_drop, y)

    optimizer_ofit.zero_grad()
    optimizer_drop.zero_grad()
    loss_ofit.backward()
    loss_drop.backward()
    optimizer_ofit.step()
    optimizer_drop.step()

    if t % 10 == 0:
        #**************************************************** #
        # 下面这两句是关键，来关闭测试的时候的 drop out             #
        # change to eval mode in order to fix drop out effect #
        #**************************************************** #
        net_overfitting.eval()
        net_dropped.eval()  # parameters for dropout differ from train mode

        # plotting
        plt.cla()
        test_x = test_x.cuda()
        test_y = test_y.cuda()
        test_pred_ofit = net_overfitting(test_x)
        test_pred_drop = net_dropped(test_x)
        plt.scatter(x.data.cpu().numpy(), y.data.cpu().numpy(), c='magenta', s=50, alpha=0.3, label='train')
        plt.scatter(test_x.data.cpu().numpy(), test_y.data.cpu().numpy(), c='cyan', s=50, alpha=0.3, label='test')
        plt.plot(test_x.data.cpu().numpy(), test_pred_ofit.data.cpu().numpy(), 'r-', lw=3, label='overfitting')
        plt.plot(test_x.data.cpu().numpy(), test_pred_drop.data.cpu().numpy(), 'b--', lw=3, label='dropout(50%)')
        plt.text(0, -1.2, 'overfit loss=%.4f' % loss_func(test_pred_ofit, test_y).data.cpu().numpy(), fontdict={
    
    'size': 20, 'color':  'red'})
        plt.text(0, -1.5, 'dropout loss=%.4f' % loss_func(test_pred_drop, test_y).data.cpu().numpy(), fontdict={
    
    'size': 20, 'color': 'blue'})
        plt.legend(loc='upper left');
        plt.ylim((-2.5, 2.5))
        plt.pause(0.1)
		# ************************* #
        # change back to train mode #
        # ************************* #
        net_overfitting.train()
        net_dropped.train()
#plt.savefig("2.png")
plt.ioff()
plt.show()

模型如下

Sequential(
  (0): Linear(in_features=1, out_features=300, bias=True)
  (1): ReLU()
  (2): Linear(in_features=300, out_features=300, bias=True)
  (3): ReLU()
  (4): Linear(in_features=300, out_features=1, bias=True)
)
Sequential(
  (0): Linear(in_features=1, out_features=300, bias=True)
  (1): Dropout(p=0.5)
  (2): ReLU()
  (3): Linear(in_features=300, out_features=300, bias=True)
  (4): Dropout(p=0.5)
  (5): ReLU()
  (6): Linear(in_features=300, out_features=1, bias=True)
)

训练集和测试集如下
在这里插入图片描述
结果如下

可以看到，dropout 后（蓝色的），虽然在 train data 上表现一般，但是在 test 上结果还行

反观不用 dropout（红色的），在 train data 上表现非常好，但是再 test 上表现就差一些

16 Batch Normalization（略）

可视化的图没有调出来，而且感觉很冗余，日后补个 CNN 的

《PyTorch | MorvanZhou 》learning notes（下）