Steepest descent method - python implementation

1. Basic principles

The steepest descent method starts from an initial point and gradually walks in the direction where the function value changes the fastest based on the current point until it reaches the optimal solution. Then we have to consider two questions: (1) in what direction to go; (2) how far should we go;

We know that it is the fastest to descend along the direction with the largest directional derivative at a certain point in the function, then we have to find the direction parallel to the gradient of the point, along this direction (as a problem) or along this maxdirection In the opposite direction (when it is mina problem) to update the current position. How far should you consider going? At this time, we have to iterate along the direction of the gradient until we find the converged iteration point, which is the optimal solution we require.

2. python code implementation

Let's use the steepest descent method to find the function

f(x)=x_{1}^{2}+2x_{2}^{2}-2x_{1}x_{2}-2x_{2}(x_{1},x_{2})\in R

The minimum value of , where the initial point is (0, 0).

Two implementation codes are given below:

import math 
from sympy import *
x1=symbols('x1')
x2=symbols('x2')
fun=x1**2+2*x2**2-2*x1*x2-2*x2
grad1=diff(fun,x1)
grad2=diff(fun,x2)
MaxIter=100
epsilon=0.0001
iter_cnt=0
current_step_size=100
x1_value=0
x2_value=0
grad1_value=(float)(grad1.subs({x1:x1_value,x2:x2_value}).evalf())
grad2_value=(float)(grad2.subs({x1:x1_value,x2:x2_value}).evalf())
current_obj=fun.subs({x1:x1_value,x2:x2_value}).evalf()
print('iterCnt:%2d  cur_point(%3.2f,%3.2f)  cur_obj:%5.4f  grad1:%5.4f  grad2:%5.4f '
      %(iter_cnt,x1_value,x2_value,current_obj,grad1_value,grad2_value))
while(abs(grad1_value) + abs(grad2_value) >= epsilon):  
    iter_cnt += 1
    t = symbols('t')
    x1_updated = x1_value - grad1_value * t
    x2_updated = x2_value - grad2_value * t
    Fun_updated = fun.subs({x1: x1_updated, x2: x2_updated})
    grad_t = diff(Fun_updated, t)
    t_value = solve(grad_t, t)[0]  # solve grad_t == 0

    grad1_value = (float)(grad1.subs({x1: x1_value, x2: x2_value}).evalf()) 
    grad2_value = (float)(grad2.subs({x1: x1_value, x2: x2_value}).evalf()) 

    x1_value = (float)(x1_value - t_value * grad1_value)
    x2_value = (float)(x2_value - t_value * grad2_value) 

    current_obj = fun.subs({x1: x1_value, x2: x2_value}).evalf()
    current_step_size = t_value

    print('iterCnt:%2d  cur_point(%3.2f, %3.2f)  cur_obj:%5.4f  grad_1:%5.4f  grad_2 :%5.4f' 
          % (iter_cnt, x1_value, x2_value, current_obj, grad1_value, grad2_value)) 
import numpy as np
from sympy import *
import math
import matplotlib.pyplot as plt
import mpl_toolkits.axisartist as axisartist

x1, x2, t = symbols('x1, x2, t')

def func():
    return pow(x1, 2) + 2 * pow(x2, 2) - 2 * x1 * x2 - 2 * x2

def grad(data):
    f = func()
    grad_vec = [diff(f, x1), diff(f, x2)]  # 求偏导数,梯度向量
    grad = []
    for item in grad_vec:
        grad.append(item.subs(x1, data[0]).subs(x2, data[1]))
    return grad

def grad_len(grad):
    vec_len = math.sqrt(pow(grad[0], 2) + pow(grad[1], 2))
    return vec_len

def zhudian(f):
    t_diff = diff(f)
    t_min = solve(t_diff)
    return t_min

def main(X0, theta):
    f = func()
    grad_vec = grad(X0)
    grad_length = grad_len(grad_vec)  # 梯度向量的模长
    k = 0
    data_x = [0]
    data_y = [0]
    while grad_length > theta:  # 迭代的终止条件
        k += 1
        p = -np.array(grad_vec)
        # 迭代
        X = np.array(X0) + t*p
        t_func = f.subs(x1, X[0]).subs(x2, X[1])
        t_min = zhudian(t_func)
        X0 = np.array(X0) + t_min*p
        grad_vec = grad(X0)
        grad_length = grad_len(grad_vec)
        print('grad_length', grad_length)
        print('坐标', float(X0[0]), float(X0[1]))
        data_x.append(X0[0])
        data_y.append(X0[1])

    print(k)

    # 绘图
    fig = plt.figure()
    ax = axisartist.Subplot(fig, 111)
    fig.add_axes(ax)
    ax.axis["bottom"].set_axisline_style("-|>", size=1.5)
    ax.axis["left"].set_axisline_style("->", size=1.5)
    ax.axis["top"].set_visible(False)
    ax.axis["right"].set_visible(False)
    plt.title(r'$Gradient \ method - steepest \ descent \ method$')
    plt.plot(data_x, data_y,color='r',label=r'$f(x_1,x_2)=x_1^2+2 \cdot x_2^2-2 \cdot x_1 \cdot x_2-2 \cdot x_2$')
    plt.legend()
    plt.scatter(1, 1, marker=(3, 1), c=2, s=100)
    plt.grid()
    plt.xlabel(r'$x_1$', fontsize=20)
    plt.ylabel(r'$x_2$', fontsize=20)
    plt.show()

if __name__ == '__main__':
	# 给定初始迭代点和阈值
    main([0, 0], 0.00001)

The solution result is that there is a minimum value of -1 at point (1, 1).

Reference blog:

Python implements the steepest descent method (The steepest descent method) detailed case

Python Gradient Method - Steepest Descent Method

Guess you like

Origin blog.csdn.net/m0_64087341/article/details/127983943