【PyTorch实战】一、双层神经网络的示例

PyTorch实战系列

【PyTorch实战】一、双层神经网络的示例
【PyTorch实战】二、利用PyTorch玩个Fizz_Buzz小游戏

1. 用Numpy实现

import numpy as np

N, D_in, H, D_out = 64, 1000, 100, 10
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for it in range(500):
	h = x.dot(w1) # N H
	h_relu = np.maximum(0, h) # N H
	y_pred = h_relu.dot(w2) # N D_out
	
	loss = np.square(y_pred - y).sum()
	
	
	grad_y_pred = 2 * (y_pred - y)
	grad_w2 = h_relu.T.dot(grad_y_pred)
	grad_h_relu = grad_y_pred.dot(w2.T)
	# grad_h_relu[h<0] = 0 # 合理 因为小于0的点为0后梯度为0; 不合理,按公式求完后,无论值的大小正负,都会对下一步计算产生影响,因此不可以设定为零
	grad_w1 = x.T.dot(grad_h_relu)
	
	w2 -= learning_rate * grad_w2
	w1 -= learning_rate * grad_w1
	
	if loss<1:
		print(it, loss)
		break

2. 用PyTorch低级实现

import numpy as np
import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)

learning_rate = 1e-6
for it in range(500):
  h = x.mm(w1) # N H
  h_relu = h.clamp(min=0) # N H
  y_pred = h_relu.mm(w2) # N D_out

  # computer loss
  loss = (y_pred - y).pow(2).sum()
  print(it, loss.item())

  # Backward pass
  grad_y_pred = 2 * (y_pred - y)
  grad_w2 = h_relu.T.mm(grad_y_pred)
  grad_h_relu = grad_y_pred.mm(w2.T)
  # grad_h_relu[h<0] = 0 # 合理 因为小于0的点为0后梯度为0; 不合理,按公式求完后,无论值的大小正负,都会对下一步计算产生影响,因此不可以设定为零
  grad_w1 = x.T.mm(grad_h_relu)

  w2 -= learning_rate * grad_w2
  w1 -= learning_rate * grad_w1

3. 用PyTorch自动完成求梯度和反向传播

import numpy as np
import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H,requires_grad=True)
w2 = torch.randn(H, D_out, requires_grad=True)

learning_rate = 1e-6
for it in range(500):
  y_pred = x.mm(w1).clamp(min=0).mm(w2) 

  # computer loss
  loss = (y_pred - y).pow(2).sum()
  print(it, loss.item())

  # Backward pass 
  loss.backward() # 完成loss对每个参数求梯度

  with torch.no_grad():
    w1 -= learning_rate * w1.grad
    w2 -= learning_rate * w2.grad
    w1.grad.zero_()
    w2.grad.zero_()

4. 引入torch.nn建立模型和损失函数

import numpy as np
import torch.nn as nn
import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# w1 = torch.randn(D_in, H,requires_grad=True)
# w2 = torch.randn(H, D_out, requires_grad=True)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)


# model = model.cuda()

loss_fn = nn.MSELoss(reduction = 'sum')


learning_rate = 1e-6
for it in range(500):
  # y_pred = x.mm(w1).clamp(min=0).mm(w2)
  y_pred = model(x) # model.forward()

  # computer loss
  loss = loss_fn(y_pred, y)
  print(it, loss.item())

  # Backward pass
  loss.backward()

  # update weight of w1, w2
  with torch.no_grad():
    # w1 -= learning_rate * w1.grad
    # w2 -= learning_rate * w2.grad
    # w1.grad.zero_()
    # w2.grad.zero_()
    for param in model.parameters(): # param(tensor, grad)
      param -= learning_rate * param.grad
  
  
  model.zero_grad()

5. 加入优化器Optimizer

import numpy as np
import torch.nn as nn
import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# w1 = torch.randn(D_in, H,requires_grad=True)
# w2 = torch.randn(H, D_out, requires_grad=True)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)


# model = model.cuda()

loss_fn = nn.MSELoss(reduction = 'sum')
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
# optim有很多可以SGD等

for it in range(500):
  # y_pred = x.mm(w1).clamp(min=0).mm(w2)
  y_pred = model(x) # model.forward()

  # computer loss
  loss = loss_fn(y_pred, y)
  print(it, loss.item())

  optimizer.zero_grad()
  # Backward pass
  loss.backward()

  # update model parameters
  optimizer.step()

6. 把model封装成一个类

import numpy as np
import torch.nn as nn
import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

class TwoLayerNet(torch.nn.Module):
  # define the model archetecture
  def __init__(self, D_in, H, D_out):
    super(TwoLayerNet, self).__init__()
    self.linear1=torch.nn.Linear(D_in, H)
    self.linear2 = torch.nn.Linear(H, D_out)
  def forward(self, x):
    temp = self.linear1(x).clamp(min=0)
    y_pred = self.linear2(temp)
    return y_pred



model = TwoLayerNet(D_in, H, D_out) # model = model.cuda()
loss_fn = nn.MSELoss(reduction = 'sum')
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

for it in range(500):
  # y_pred = x.mm(w1).clamp(min=0).mm(w2)
  y_pred = model(x) # model.forward()

  # computer loss
  loss = loss_fn(y_pred, y)
  print(it, loss.item())

  optimizer.zero_grad()
  # Backward pass
  loss.backward()

  # update model parameters
  optimizer.step()

参考感谢

[1] 七月在线-褚则伟的pytorch实战课程

猜你喜欢

转载自blog.csdn.net/xiangduixuexi/article/details/106676656