学习资源
前文链接
文章目录
11 LSTM for MNIST(GPU)
在 9、10 小节的基础上,直接改成 GPU 的版本跑跑
"""
View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
My Youtube Channel: https://www.youtube.com/user/MorvanZhou
Dependencies:
torch: 0.4
matplotlib
torchvision
"""
import os
import torch
from torch import nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
torch.cuda.set_device(1) # change
# torch.manual_seed(1) # reproducible
# Hyper Parameters
EPOCH = 1 # train the training data n times, to save time, we just train 1 epoch
BATCH_SIZE = 64
TIME_STEP = 28 # rnn time step / image height
INPUT_SIZE = 28 # rnn input size / image width
LR = 0.01 # learning rate
DOWNLOAD_MNIST = True # set to True if haven't download the data
# Mnist digital dataset
train_data = dsets.MNIST(
root='./mnist/',
train=True, # this is training data
transform=transforms.ToTensor(), # Converts a PIL.Image or numpy.ndarray to
# torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0]
download=DOWNLOAD_MNIST, # download it if you don't have it
)
# plot one example
print(train_data.train_data.size()) # (60000, 28, 28)
print(train_data.train_labels.size()) # (60000)
plt.imshow(train_data.train_data[0].numpy(), cmap='gray')
plt.title('%i' % train_data.train_labels[0])
plt.show()
# Data Loader for easy mini-batch return in training
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
# convert test data into Variable, pick 2000 samples to speed up testing
test_data = dsets.MNIST(root='./mnist/', train=False, transform=transforms.ToTensor())
test_x = test_data.test_data.type(torch.FloatTensor)[:2000].cuda()/255. # Tensor on GPU
test_y = test_data.test_labels[:2000].cuda()
class RNN(nn.Module):
def __init__(self):
super(RNN, self).__init__()
self.rnn = nn.LSTM( # if use nn.RNN(), it hardly learns
input_size=INPUT_SIZE,
hidden_size=64, # rnn hidden unit
num_layers=1, # number of rnn layer
batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
)
self.out = nn.Linear(64, 10)
def forward(self, x):
# x shape (batch, time_step, input_size) 等价于(batch,行,列)
# r_out shape (batch, time_step, output_size) 64,28,64
# h_n shape (n_layers, batch, hidden_size) 1,64,64
# h_c shape (n_layers, batch, hidden_size) 1,64,64
r_out, (h_n, h_c) = self.rnn(x, None) # None represents zero initial hidden state
# choose r_out at the last time step,看完整张图片的结果
out = self.out(r_out[:, -1, :])
return out
rnn = RNN()
rnn.cuda()
print(rnn)
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted
# training and testing
for epoch in range(EPOCH):
for step, (b_x, b_y) in enumerate(train_loader): # gives batch data
b_x = b_x.cuda() # Tensor on GPU
b_y = b_y.cuda() # Tensor on GPU
b_x = b_x.view(-1, 28, 28) # reshape x to (batch, time_step, input_size)
output = rnn(b_x) # rnn output
loss = loss_func(output, b_y) # cross entropy loss
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
if step % 50 == 0:
test_output = rnn(test_x) # (samples, time_step, input_size)
#pred_y = torch.max(test_output, 1)[1].data.numpy()
pred_y = torch.max(test_output, 1)[1].cuda().data # move the computation in GPU
#accuracy = float((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0))
accuracy = torch.sum(pred_y == test_y).type(torch.FloatTensor) / test_y.size(0)
#print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.cpu().numpy(), '| test accuracy: %.2f' % accuracy)
# print 10 predictions from test data
test_output = rnn(test_x[:10].view(-1, 28, 28))
pred_y = torch.max(test_output, 1)[1].cuda().data
print(pred_y, 'prediction number')
print(test_y[:10], 'real number')
output
RNN(
(rnn): LSTM(28, 64, batch_first=True)
(out): Linear(in_features=64, out_features=10, bias=True)
)
Epoch: 0 | train loss: 2.3073 | test accuracy: 0.10
Epoch: 0 | train loss: 1.1458 | test accuracy: 0.62
Epoch: 0 | train loss: 0.8296 | test accuracy: 0.73
Epoch: 0 | train loss: 0.4943 | test accuracy: 0.81
Epoch: 0 | train loss: 0.5121 | test accuracy: 0.85
Epoch: 0 | train loss: 0.3138 | test accuracy: 0.87
Epoch: 0 | train loss: 0.2804 | test accuracy: 0.86
Epoch: 0 | train loss: 0.2422 | test accuracy: 0.90
Epoch: 0 | train loss: 0.2091 | test accuracy: 0.92
Epoch: 0 | train loss: 0.1030 | test accuracy: 0.93
Epoch: 0 | train loss: 0.2337 | test accuracy: 0.92
Epoch: 0 | train loss: 0.1778 | test accuracy: 0.93
Epoch: 0 | train loss: 0.4187 | test accuracy: 0.93
Epoch: 0 | train loss: 0.2747 | test accuracy: 0.94
Epoch: 0 | train loss: 0.2407 | test accuracy: 0.93
Epoch: 0 | train loss: 0.1944 | test accuracy: 0.94
Epoch: 0 | train loss: 0.3314 | test accuracy: 0.94
Epoch: 0 | train loss: 0.1331 | test accuracy: 0.95
Epoch: 0 | train loss: 0.1707 | test accuracy: 0.95
tensor([7, 2, 1, 0, 4, 1, 4, 9, 6, 9], device='cuda:1') prediction number
tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9], device='cuda:1') real number
r_out, (h_n, h_c) = self.rnn(x, None)
这句话的理解
图片来源:https://www.jianshu.com/p/043083d114d4
配合 【Keras-LSTM】IMDb 前面的示例图,茅舍顿开!!!
12 RNN for Regression(GPU)
相比 LSTM,只有一个 hidden state,我们用 sin 来拟合 cos,没个 pi 之间取10个点作为 time_steps,batch 为1,input_size 为1
一共 100 pi 迭代 100 steps,epoch 为 1
"""
View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
My Youtube Channel: https://www.youtube.com/user/MorvanZhou
Dependencies:
torch: 0.4
matplotlib
numpy
"""
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
torch.cuda.set_device(1) # change
# torch.manual_seed(1) # reproducible
# Hyper Parameters
TIME_STEP = 10 # rnn time step
INPUT_SIZE = 1 # rnn input size
LR = 0.02 # learning rate
# show data
steps = np.linspace(0, np.pi*2, 100, dtype=np.float32) # float32 for converting torch FloatTensor
x_np = np.sin(steps)
y_np = np.cos(steps)
plt.plot(steps, y_np, 'r-', label='target (cos)')
plt.plot(steps, x_np, 'b-', label='input (sin)')
plt.legend(loc='best')
#plt.savefig("1.png")
plt.show()
class RNN(nn.Module):
def __init__(self):
super(RNN, self).__init__()
self.rnn = nn.RNN(
input_size=INPUT_SIZE,
hidden_size=32, # rnn hidden unit
num_layers=1, # number of rnn layer
batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
)
self.out = nn.Linear(32, 1)
def forward(self, x, h_state):
# x (batch, time_step, input_size)
# h_state (n_layers, batch, hidden_size)
# r_out (batch, time_step, hidden_size)
r_out, h_state = self.rnn(x, h_state)
outs = [] # save all predictions
for time_step in range(r_out.size(1)): # calculate output for each time step
outs.append(self.out(r_out[:, time_step, :])) # 将每一个 timesteps 的结果存在 list 中
return torch.stack(outs, dim=1), h_state # 转成张量
# instead, for simplicity, you can replace above codes by follows
# r_out = r_out.view(-1, 32)
# outs = self.out(r_out)
# outs = outs.view(-1, TIME_STEP, 1)
# return outs, h_state
# or even simpler, since nn.Linear can accept inputs of any dimension
# and returns outputs with same dimension except for the last
# outs = self.out(r_out)
# return outs
rnn = RNN()
rnn.cuda() #change
print(rnn)
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters
loss_func = nn.MSELoss()
h_state = None # for initial hidden state
plt.figure(1, figsize=(12, 5))
plt.ion() # continuously plot
for step in range(100):
start, end = step * np.pi, (step+1)*np.pi # time range
# use sin predicts cos
steps = np.linspace(start, end, TIME_STEP, dtype=np.float32, endpoint=False) # float32 for converting torch FloatTensor
x_np = np.sin(steps)
y_np = np.cos(steps)
x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis]) # shape (batch, time_step, input_size),(1,10,1)
y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])
x = x.cuda() # change
y = y.cuda() # change
prediction, h_state = rnn(x, h_state) # rnn output
# !! next step is important !!
h_state = h_state.data # repack the hidden state, break the connection from last iteration
loss = loss_func(prediction, y) # calculate loss
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
# plotting
plt.plot(steps, y_np.flatten(), 'r-')
#plt.plot(steps, prediction.data.numpy().flatten(), 'b-')
plt.plot(steps, prediction.data.cpu().numpy().flatten(), 'b-') # change
plt.draw(); plt.pause(0.05)
plt.ioff()
#plt.savefig("2.png")
plt.show()
output
RNN(
(rnn): RNN(1, 32, batch_first=True)
(out): Linear(in_features=32, out_features=1, bias=True)
)
可以看出一开始效果一般,后面越来越好
13 Autoencoder(GPU)
图片 28*28 传进来,依次 encode 成 128,64,32,12,3,decode 是逆向的过程!
图片来源:https://www.bilibili.com/video/av15997678/?p=24
"""
View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
My Youtube Channel: https://www.youtube.com/user/MorvanZhou
Dependencies:
torch: 0.4
matplotlib
numpy
"""
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import numpy as np
torch.cuda.set_device(1) # change
# torch.manual_seed(1) # reproducible
# Hyper Parameters
EPOCH = 10
BATCH_SIZE = 64
LR = 0.005 # learning rate
DOWNLOAD_MNIST = False
N_TEST_IMG = 5
# Mnist digits dataset
train_data = torchvision.datasets.MNIST(
root='./mnist/',
train=True, # this is training data
transform=torchvision.transforms.ToTensor(), # Converts a PIL.Image or numpy.ndarray to
# torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0]
download=DOWNLOAD_MNIST, # download it if you don't have it
)
# plot one example
print(train_data.train_data.size()) # (60000, 28, 28)
print(train_data.train_labels.size()) # (60000)
plt.imshow(train_data.train_data[2].numpy(), cmap='gray')
plt.title('%i' % train_data.train_labels[2])
plt.show()
# Data Loader for easy mini-batch return in training, the image batch shape will be (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
class AutoEncoder(nn.Module):
def __init__(self):
super(AutoEncoder, self).__init__()
self.encoder = nn.Sequential(
nn.Linear(28*28, 128),
nn.Tanh(),
nn.Linear(128, 64),
nn.Tanh(),
nn.Linear(64, 12),
nn.Tanh(),
nn.Linear(12, 3), # compress to 3 features which can be visualized in plt
)
self.decoder = nn.Sequential(
nn.Linear(3, 12),
nn.Tanh(),
nn.Linear(12, 64),
nn.Tanh(),
nn.Linear(64, 128),
nn.Tanh(),
nn.Linear(128, 28*28),
nn.Sigmoid(), # compress to a range (0, 1)
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return encoded, decoded
autoencoder = AutoEncoder()
autoencoder.cuda()
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=LR)
loss_func = nn.MSELoss()
# initialize figure
f, a = plt.subplots(2, N_TEST_IMG, figsize=(5, 2))
plt.ion() # continuously plot
# original data (first row) for viewing
view_data = train_data.train_data[:N_TEST_IMG].view(-1, 28*28).type(torch.FloatTensor).cuda()/255.
for i in range(N_TEST_IMG):
a[0][i].imshow(np.reshape(view_data.data.cpu().numpy()[i], (28, 28)), cmap='gray')
a[0][i].set_xticks(())
a[0][i].set_yticks(())
for epoch in range(EPOCH):
for step, (x, b_label) in enumerate(train_loader):
b_x = x.view(-1, 28*28) # batch x, shape (batch, 28*28)
b_y = x.view(-1, 28*28) # batch y, shape (batch, 28*28)
b_x = b_x.cuda()
b_y = b_y.cuda()
encoded, decoded = autoencoder(b_x)
loss = loss_func(decoded, b_y) # mean square error
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
if step % 100 == 0:
print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.cpu().numpy())
# plotting decoded image (second row)
_, decoded_data = autoencoder(view_data)
for i in range(N_TEST_IMG):
a[1][i].clear()
a[1][i].imshow(np.reshape(decoded_data.data.cpu().numpy()[i], (28, 28)), cmap='gray')
a[1][i].set_xticks(()); a[1][i].set_yticks(())
plt.draw(); plt.pause(0.05)
plt.ioff()
plt.show()
我们看看训练开始的 loss 和最后时候的 loss
Epoch: 0 | train loss: 0.2328
Epoch: 0 | train loss: 0.0652
Epoch: 0 | train loss: 0.0686
……
Epoch: 9 | train loss: 0.0352
Epoch: 9 | train loss: 0.0338
Epoch: 9 | train loss: 0.0334
最后看看生成的效果图,4 和 9 界限还是比较模糊的,哈哈哈
14 GAN(GPU)
先看结果
我们用 GAN 来画二次曲线,label 是画家用 15 个点画的 (一元二次函数,用十五个点表示, a x 2 + ( a − 1 ) ax^2+(a-1) ax2+(a−1), a a a 初始化在 1 和 2 之间),我们(新手) 用 5 个点来拟合画家的作品!上面的结果显示了画家结果的上( a = 2 a=2 a=2)下( a = 1 a=1 a=1)限,我们学到的结果(绿色)在这个中间就说明很 OK!
"""
View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
My Youtube Channel: https://www.youtube.com/user/MorvanZhou
Dependencies:
torch: 0.4
numpy
matplotlib
"""
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
torch.cuda.set_device(1) # change
# torch.manual_seed(1) # reproducible
# np.random.seed(1)
# Hyper Parameters
BATCH_SIZE = 64
LR_G = 0.0001 # learning rate for generator
LR_D = 0.0001 # learning rate for discriminator
N_IDEAS = 5 # think of this as number of ideas for generating an art work (Generator)
ART_COMPONENTS = 15 # it could be total point G can draw in the canvas
PAINT_POINTS = np.vstack([np.linspace(-1, 1, ART_COMPONENTS) for _ in range(BATCH_SIZE)]) # 数组按垂直方向叠加 64 batch,每个batch 15个点
# show our beautiful painting range
# plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + 1, c='#74BCFF', lw=3, label='upper bound')
# plt.plot(PAINT_POINTS[0], 1 * np.power(PAINT_POINTS[0], 2) + 0, c='#FF9359', lw=3, label='lower bound')
# plt.legend(loc='upper right')
# plt.show()
def artist_works(): # painting from the famous artist (real target)
a = np.random.uniform(1, 2, size=BATCH_SIZE)[:, np.newaxis] # 每一个 batch 产生一个 a,(batch,1)
paintings = a * np.power(PAINT_POINTS, 2) + (a-1) # ax^2 + b 一元二次方程
paintings = torch.from_numpy(paintings).float()
return paintings # (64,15)
G = nn.Sequential( # Generator
nn.Linear(N_IDEAS, 128), # random ideas (could from normal distribution)
nn.ReLU(),
nn.Linear(128, ART_COMPONENTS), # making a painting from these random ideas
)
D = nn.Sequential( # Discriminator
nn.Linear(ART_COMPONENTS, 128), # receive art work either from the famous artist or a newbie like G
nn.ReLU(),
nn.Linear(128, 1),
nn.Sigmoid(), # tell the probability that the art work is made by artist
)
G.cuda()
D.cuda()
opt_D = torch.optim.Adam(D.parameters(), lr=LR_D)
opt_G = torch.optim.Adam(G.parameters(), lr=LR_G)
plt.ion() # something about continuous plotting
for step in range(10000):
artist_paintings = artist_works() # real painting from artist
artist_paintings = artist_paintings.cuda()
G_ideas = torch.randn(BATCH_SIZE, N_IDEAS) # random ideas (batch,n_ideas) (64,5)
G_ideas = G_ideas.cuda()
G_paintings = G(G_ideas) # fake painting from G (random ideas)
prob_artist0 = D(artist_paintings) # D try to increase this prob (64,15)
prob_artist1 = D(G_paintings) # D try to reduce this prob (64,5)
D_loss = - torch.mean(torch.log(prob_artist0) + torch.log(1. - prob_artist1))
G_loss = torch.mean(torch.log(1. - prob_artist1))
opt_D.zero_grad()
D_loss.backward(retain_graph=True) # reusing computational graph
opt_D.step()
opt_G.zero_grad()
G_loss.backward()
opt_G.step()
if step % 50 == 0: # plotting
plt.cla()
plt.plot(PAINT_POINTS[0], G_paintings.data.cpu().numpy()[0], c='#4AD631', lw=3, label='Generated painting',)
plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + 1, c='#74BCFF', lw=3, label='upper bound')
plt.plot(PAINT_POINTS[0], 1 * np.power(PAINT_POINTS[0], 2) + 0, c='#FF9359', lw=3, label='lower bound')
plt.text(-.5, 2.3, 'D accuracy=%.2f' % prob_artist0.data.cpu().numpy().mean(), fontdict={
'size': 13})
plt.text(-.5, 2, 'D score= %.2f' % -D_loss.data.cpu().numpy(), fontdict={
'size': 13})
plt.ylim((-0.5, 4))
plt.legend(loc='upper right', fontsize=10)
plt.draw();
plt.pause(0.01)
plt.ioff()
plt.savefig("1.png")
plt.show()
15 Dropout
注意训练的时候用,测试的时候要关闭,用 model.eval()
!!!
数据集是 y = x 加入了高斯噪声,对比加入和不加入drop out 来看拟合情况,模型是几个全连接层!
"""
View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
My Youtube Channel: https://www.youtube.com/user/MorvanZhou
Dependencies:
torch: 0.4
matplotlib
"""
import torch
import matplotlib.pyplot as plt
torch.cuda.set_device(0) # change
plt.rcParams['figure.figsize'] = (8.0, 6.0) # 设置figure_size尺寸
# torch.manual_seed(1) # reproducible
N_SAMPLES = 20
N_HIDDEN = 300
# training data
x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1)
y = x + 0.3*torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1))
# test data
test_x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1)
test_y = test_x + 0.3*torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1))
# show data
plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.5, label='train')
plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.5, label='test')
plt.legend(loc='upper left')
plt.ylim((-2.5, 2.5))
#plt.savefig("1.png")
plt.show()
net_overfitting = torch.nn.Sequential(
torch.nn.Linear(1, N_HIDDEN),
torch.nn.ReLU(),
torch.nn.Linear(N_HIDDEN, N_HIDDEN),
torch.nn.ReLU(),
torch.nn.Linear(N_HIDDEN, 1),
)
net_dropped = torch.nn.Sequential(
torch.nn.Linear(1, N_HIDDEN),
torch.nn.Dropout(0.5), # drop 50% of the neuron
torch.nn.ReLU(),
torch.nn.Linear(N_HIDDEN, N_HIDDEN),
torch.nn.Dropout(0.5), # drop 50% of the neuron
torch.nn.ReLU(),
torch.nn.Linear(N_HIDDEN, 1),
)
net_overfitting.cuda()
net_dropped.cuda()
print(net_overfitting) # net architecture
print(net_dropped)
optimizer_ofit = torch.optim.Adam(net_overfitting.parameters(), lr=0.01)
optimizer_drop = torch.optim.Adam(net_dropped.parameters(), lr=0.01)
loss_func = torch.nn.MSELoss()
plt.ion() # something about plotting
for t in range(500):
x = x.cuda()
y = y.cuda()
pred_ofit = net_overfitting(x)
pred_drop = net_dropped(x)
loss_ofit = loss_func(pred_ofit, y)
loss_drop = loss_func(pred_drop, y)
optimizer_ofit.zero_grad()
optimizer_drop.zero_grad()
loss_ofit.backward()
loss_drop.backward()
optimizer_ofit.step()
optimizer_drop.step()
if t % 10 == 0:
#**************************************************** #
# 下面这两句是关键,来关闭测试的时候的 drop out #
# change to eval mode in order to fix drop out effect #
#**************************************************** #
net_overfitting.eval()
net_dropped.eval() # parameters for dropout differ from train mode
# plotting
plt.cla()
test_x = test_x.cuda()
test_y = test_y.cuda()
test_pred_ofit = net_overfitting(test_x)
test_pred_drop = net_dropped(test_x)
plt.scatter(x.data.cpu().numpy(), y.data.cpu().numpy(), c='magenta', s=50, alpha=0.3, label='train')
plt.scatter(test_x.data.cpu().numpy(), test_y.data.cpu().numpy(), c='cyan', s=50, alpha=0.3, label='test')
plt.plot(test_x.data.cpu().numpy(), test_pred_ofit.data.cpu().numpy(), 'r-', lw=3, label='overfitting')
plt.plot(test_x.data.cpu().numpy(), test_pred_drop.data.cpu().numpy(), 'b--', lw=3, label='dropout(50%)')
plt.text(0, -1.2, 'overfit loss=%.4f' % loss_func(test_pred_ofit, test_y).data.cpu().numpy(), fontdict={
'size': 20, 'color': 'red'})
plt.text(0, -1.5, 'dropout loss=%.4f' % loss_func(test_pred_drop, test_y).data.cpu().numpy(), fontdict={
'size': 20, 'color': 'blue'})
plt.legend(loc='upper left');
plt.ylim((-2.5, 2.5))
plt.pause(0.1)
# ************************* #
# change back to train mode #
# ************************* #
net_overfitting.train()
net_dropped.train()
#plt.savefig("2.png")
plt.ioff()
plt.show()
模型如下
Sequential(
(0): Linear(in_features=1, out_features=300, bias=True)
(1): ReLU()
(2): Linear(in_features=300, out_features=300, bias=True)
(3): ReLU()
(4): Linear(in_features=300, out_features=1, bias=True)
)
Sequential(
(0): Linear(in_features=1, out_features=300, bias=True)
(1): Dropout(p=0.5)
(2): ReLU()
(3): Linear(in_features=300, out_features=300, bias=True)
(4): Dropout(p=0.5)
(5): ReLU()
(6): Linear(in_features=300, out_features=1, bias=True)
)
训练集和测试集如下
结果如下
可以看到,dropout 后(蓝色的),虽然在 train data 上表现一般,但是在 test 上结果还行
反观不用 dropout(红色的),在 train data 上表现非常好,但是再 test 上表现就差一些
16 Batch Normalization(略)
可视化的图没有调出来,而且感觉很冗余,日后补个 CNN 的