1 hands-on learning: linear regression

1 loss function and optimization function

Here Insert Picture Description
Here Insert Picture Description
Here Insert Picture Description

Update 2 gradient mode

Here Insert Picture Description
Batch gradient descent gradient descent for all samples, the parameters once optimized averaged.
Here Insert Picture Description
Stochastic gradient descent gradient required for a single sample point, update the parameters.
Here Insert Picture Description
Small batch gradient descent into small sample quantities, find the average gradient for each small sample quantities, the parameters update.
Here Insert Picture Description
Here Insert Picture Description
The basic search step gradient update:
Here Insert Picture Description

3 pytorch achieve linear regression from zero

# import packages and modules
%matplotlib inline
import torch
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random

print(torch.__version__)

# 生成数据集,假设是二维线性模型
# set input feature number,2个特征
num_inputs = 2
# set example number,1000个样本点
num_examples = 1000

# set true weight and bias in order to generate corresponded label
true_w = [2, -3.4]
true_b = 4.2

# torch.randn(n1, n2)生成n1*n2大小的标准正态分布随机数
features = torch.randn(num_examples, num_inputs, dtype=torch.float32)
print(features.shape)
# 严格线性的标签
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b

# 在严格线性的基础上加上随机扰动,更符合现实情形
# torch.tensor(data, dtype=None, device=None, requires_grad=False)
# 其中data可以是:list, tuple, array, scalar等类型。
# torch.tensor()可以从data中的数据部分做拷贝(而不是直接引用),根据原始数据类型生成相应的torch.LongTensor,torch.FloatTensor,torch.DoubleTensor
# np.random.normal(loc, scale, size)  loc正态分布均值,scale正态分布标注差,size为正态分布随机数的大小
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float32)

# 读取数据集,将数据集划分为若干batch_size大小的数据
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)                      # random read 10 samples, random.shuffle()将序列的所有元素随机排序
    for i in range(0, num_examples, batch_size):
        
        # 选取batch_size个索引
        j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)]) # the last time may be not enough for a whole batch
        
        # longtensor.index_select(0, index)  0表示选取行,1表示选取列,index表示选取数据所在行或者所在列
        yield  features.index_select(0, j), labels.index_select(0, j)     # 按行选取数据,选取j所在的行形成小batch样本
        
        # yield使得函数data_iter变成了迭代器,可使用nexc(f), f.send()等方法迭代使用

# 初始化模型参数
# np.random.normal(loc, scale, size)  loc正态分布均值,scale正态分布标注差,size为正态分布随机数的大小
w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)), dtype=torch.float32)     # 2*1
b = torch.zeros(1, dtype=torch.float32)    # 1

w.requires_grad_(requires_grad=True)   # 设置w和b可自动求导
b.requires_grad_(requires_grad=True)

# 定义模型
def linreg(X, w, b):
    # torch.mm()矩阵乘法 假设X为n*2,w为2*1向量,则torch.mm(X, w)返回n*1的tensor
    return torch.mm(X, w) + b
    
# 定义损失函数
def squared_loss(y_hat, y): 
    # y.view(size) 返回y的一个新张量,这个张量和y的数据一模一样,只是shape不一样,下面使得 y的shape和 y_hat的shape保持一致
    # 假设 y 一共有n个元素
    # y.view(-1) 将y的shape变成了n
    # y.view(-1, m) 将y的shape变成了 (n/m, m)
    # 即-1表示在给定n下,计算机自动帮忙计算的shape值
    return (y_hat - y.view(y_hat.size())) ** 2 / 2

# 定义优化函数
# 使用小批量梯度优化算法
# lr:learning rate,param.grad:损失函数对参数的导数值
def sgd(params, lr, batch_size): 
    for param in params:
        param.data -= lr * param.grad / batch_size # ************ues .data to operate param without gradient track
#         print('param类型:', type(param))  为tensor对象

# 训练模型
# super parameters init
lr = 0.03
num_epochs = 5   # 训练周期

net = linreg          # 单层线性回归网络
loss = squared_loss   

# training 一共进行5次训练
for epoch in range(num_epochs):  # training repeats num_epochs times
    # in each epoch, all the samples in dataset will be used once
    
    # X is the feature and y is the label of a batch sample
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y).sum()                  # 在首次训练时,在上面w, b有给定的初始值
        # calculate the gradient of batch sample loss 
        l.backward()                                     # 损失对 w和 b求导
        # using small batch random gradient descent to iter model parameters,使用小批量梯度下降算法更新(优化)参数
        sgd([w, b], lr, batch_size)  
        # reset parameter gradient 梯度清零,因为pytorch在计算中梯度会累加,而每次计算梯度值使不需要以前的信息,故需要清零
        w.grad.data.zero_()
        b.grad.data.zero_()
        
    # 上面的循环体会对参数(w, b)进行int(num_examples/bacth_size)+1次优化
    
    # 遍历完一次样本后,利用更新后的参数(w,b)求解新的损失值
    train_l = loss(net(features, w, b), labels)     
    print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item()))

# 查看模型训练后的参数
w, true_w, b, true_b
Published an original article · won praise 0 · Views 11

Guess you like

Origin blog.csdn.net/weixin_42253959/article/details/104311880