Detailed explanation of the least squares method

The principle of the least squares method and the problems to be solved

 

Matrix solution of least squares method

 

Geometric Interpretation of Least Squares

 

Limitations and Applicable Scenarios of Least Squares 

Python combat of least squares method

 

import numpy as np
import scipy as sp
from scipy.optimize import leastsq
import matplotlib.pyplot as plt
%matplotlib inline

 

# objective function
def real_func(x):
    return np.sin (2 * np.pi * x)
#polynomial
#ps: numpy.polyld([1,2,3]) generates $1x^2+2x^1+3x^0$*
def fit_func(p, x):
    f = np.poly1d(p)
    return f(x)
# residuals
def residuals_func(p, x, y):
    ret = fit_func(p, x) - y
    return right

 

# ten points
x = np.linspace(0, 1, 10)
x_points = np.linspace(0, 1, 1000)
# Add the value of the objective function for normally distributed noise
y_ = real_func(x)
y = [np.random.normal(0, 0.1)+y1 for y1 in y_]
def fitting(M=0):
    """
    n is the degree of the polynomial
    """    
    # Randomly initialize polynomial parameters
    p_init = np.random.rand(M+1)
    # least squares
    p_lsq = leastsq(residuals_func, p_init, args=(x, y))
    print('Fitting Parameters:', p_lsq[0])
    # visualize
    plt.plot(x_points, real_func(x_points), label='real')
    plt.plot(x_points, fit_func(p_lsq[0], x_points), label='fitted curve')
    plt.plot(x, y, 'bo', label='noise')
    plt.legend()
    return p_lsq

 

# M=0
p_lsq_0 = fitting(M=0)
Fitting Parameters: [0.01914362]

 

# M=1
p_lsq_1 = fitting(M=1)
Fitting Parameters: [-1.44035975  0.73932349]

 

# M=3
p_lsq_3 = fitting(M=3)
Fitting Parameters: [ 23.32730356 -34.84982011  11.69490865  -0.04614352]

 

# M=9
p_lsq_9 = fitting(M=9)
Fitting Parameters: [-7.72885226e+03  3.20354672e+04 -5.42647096e+04  4.81881349e+04
 -2.38777532e+04 6.47385739e+03 -8.52906000e+02 1.74436725e+01
  9.47089325e+00 1.35011754e-02]

 

 
When M=9, the polynomial curve passes through each data point, but causes overfitting

Regularization

The results show overfitting, introduce a regularizer to reduce overfitting

[official]

In the regression problem, the loss function is the squared loss, and the regularization can be the L2 norm of the parameter vector, or the L1 norm.

L1: regularization*abs(p) L2: 0.5 * regularization * np.square(p)

 

regularization = 0.0001
def residuals_func_regularization(p, x, y):
    ret = fit_func(p, x) - y
    ret = np.append(ret, np.sqrt(0.5*regularization*np.square(p))) # L2 norm as regularization term
    return right

 

# Least square method, plus regularization term
p_init = np.random.rand(9+1)
p_lsq_regularization = leastsq(residuals_func_regularization, p_init, args=(x, y))

 

plt.plot(x_points, real_func(x_points), label='real')
plt.plot(x_points, fit_func(p_lsq_9[0], x_points), label='fitted curve')
plt.plot(x_points, fit_func(p_lsq_regularization[0], x_points), label='regularization')
plt.plot(x, y, 'bo', label='noise')
plt.legend()
<matplotlib.legend.Legend at 0x214a6542b08>

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324205681&siteId=291194637