1. Basic principles
The steepest descent method starts from an initial point and gradually walks in the direction where the function value changes the fastest based on the current point until it reaches the optimal solution. Then we have to consider two questions: (1) in what direction to go; (2) how far should we go;
We know that it is the fastest to descend along the direction with the largest directional derivative at a certain point in the function, then we have to find the direction parallel to the gradient of the point, along this direction (as a problem) or along this max
direction In the opposite direction (when it is min
a problem) to update the current position. How far should you consider going? At this time, we have to iterate along the direction of the gradient until we find the converged iteration point, which is the optimal solution we require.
2. python code implementation
Let's use the steepest descent method to find the function
The minimum value of , where the initial point is (0, 0).
Two implementation codes are given below:
import math
from sympy import *
x1=symbols('x1')
x2=symbols('x2')
fun=x1**2+2*x2**2-2*x1*x2-2*x2
grad1=diff(fun,x1)
grad2=diff(fun,x2)
MaxIter=100
epsilon=0.0001
iter_cnt=0
current_step_size=100
x1_value=0
x2_value=0
grad1_value=(float)(grad1.subs({x1:x1_value,x2:x2_value}).evalf())
grad2_value=(float)(grad2.subs({x1:x1_value,x2:x2_value}).evalf())
current_obj=fun.subs({x1:x1_value,x2:x2_value}).evalf()
print('iterCnt:%2d cur_point(%3.2f,%3.2f) cur_obj:%5.4f grad1:%5.4f grad2:%5.4f '
%(iter_cnt,x1_value,x2_value,current_obj,grad1_value,grad2_value))
while(abs(grad1_value) + abs(grad2_value) >= epsilon):
iter_cnt += 1
t = symbols('t')
x1_updated = x1_value - grad1_value * t
x2_updated = x2_value - grad2_value * t
Fun_updated = fun.subs({x1: x1_updated, x2: x2_updated})
grad_t = diff(Fun_updated, t)
t_value = solve(grad_t, t)[0] # solve grad_t == 0
grad1_value = (float)(grad1.subs({x1: x1_value, x2: x2_value}).evalf())
grad2_value = (float)(grad2.subs({x1: x1_value, x2: x2_value}).evalf())
x1_value = (float)(x1_value - t_value * grad1_value)
x2_value = (float)(x2_value - t_value * grad2_value)
current_obj = fun.subs({x1: x1_value, x2: x2_value}).evalf()
current_step_size = t_value
print('iterCnt:%2d cur_point(%3.2f, %3.2f) cur_obj:%5.4f grad_1:%5.4f grad_2 :%5.4f'
% (iter_cnt, x1_value, x2_value, current_obj, grad1_value, grad2_value))
import numpy as np
from sympy import *
import math
import matplotlib.pyplot as plt
import mpl_toolkits.axisartist as axisartist
x1, x2, t = symbols('x1, x2, t')
def func():
return pow(x1, 2) + 2 * pow(x2, 2) - 2 * x1 * x2 - 2 * x2
def grad(data):
f = func()
grad_vec = [diff(f, x1), diff(f, x2)] # 求偏导数,梯度向量
grad = []
for item in grad_vec:
grad.append(item.subs(x1, data[0]).subs(x2, data[1]))
return grad
def grad_len(grad):
vec_len = math.sqrt(pow(grad[0], 2) + pow(grad[1], 2))
return vec_len
def zhudian(f):
t_diff = diff(f)
t_min = solve(t_diff)
return t_min
def main(X0, theta):
f = func()
grad_vec = grad(X0)
grad_length = grad_len(grad_vec) # 梯度向量的模长
k = 0
data_x = [0]
data_y = [0]
while grad_length > theta: # 迭代的终止条件
k += 1
p = -np.array(grad_vec)
# 迭代
X = np.array(X0) + t*p
t_func = f.subs(x1, X[0]).subs(x2, X[1])
t_min = zhudian(t_func)
X0 = np.array(X0) + t_min*p
grad_vec = grad(X0)
grad_length = grad_len(grad_vec)
print('grad_length', grad_length)
print('坐标', float(X0[0]), float(X0[1]))
data_x.append(X0[0])
data_y.append(X0[1])
print(k)
# 绘图
fig = plt.figure()
ax = axisartist.Subplot(fig, 111)
fig.add_axes(ax)
ax.axis["bottom"].set_axisline_style("-|>", size=1.5)
ax.axis["left"].set_axisline_style("->", size=1.5)
ax.axis["top"].set_visible(False)
ax.axis["right"].set_visible(False)
plt.title(r'$Gradient \ method - steepest \ descent \ method$')
plt.plot(data_x, data_y,color='r',label=r'$f(x_1,x_2)=x_1^2+2 \cdot x_2^2-2 \cdot x_1 \cdot x_2-2 \cdot x_2$')
plt.legend()
plt.scatter(1, 1, marker=(3, 1), c=2, s=100)
plt.grid()
plt.xlabel(r'$x_1$', fontsize=20)
plt.ylabel(r'$x_2$', fontsize=20)
plt.show()
if __name__ == '__main__':
# 给定初始迭代点和阈值
main([0, 0], 0.00001)
The solution result is that there is a minimum value of -1 at point (1, 1).
Reference blog:
Python implements the steepest descent method (The steepest descent method) detailed case