多元函数求极值的方法：最速下降法、牛顿法、共轭梯度法、拉格朗日乘数法（python代码）

2024-06-17 22:09| 来源: 网络整理| 查看: 265

数值优化方法：在最优化中，经典的无约束最优化方法有：最速下降法、牛顿法和共轭梯度法等；多元函数有约束条件的极值的求法有拉格朗日乘数法等。对于以上几种优化方法，网上有很多博客和论文资料对其进行了解释，在此不再对具体的内容进行讲解，本文主要参考以下几篇博主的文章：最速下降法、牛顿法、共轭梯度法多元函数条件极值的求法拉格朗日乘数法最速下降和Newton法：Matlab实现无约束优化问题 — 最速下降法本文主要参考以上博主文章，编写python代码，对经典的无约束最优化方法学习，以提高自身编程能力和算法知识水平，若有错误还望大家指出并批评指正，谢谢！ 1、最速下降法：

最速下降法（Steepest descent）是梯度下降法的一种更具体实现形式，主要是在每次迭代中计算出更合适的步长，步长动态变化，使得目标函数值每次迭代时，能够得到最大程度的减少。最速下降法的计算量不大且是收敛的，但是收敛速度慢，特别是当迭代点接近最优点时，每次迭代行进的距离越来越短。

import matplotlib.pyplot as plt from sympy import * import numpy as np '''===================最速下降法(The steepest descent method)===================''' # 定义符号变量 x_1 = symbols('x_1') x_2 = symbols('x_2') # 定义目标函数 # fun = 2 * x_1 * x_2 + 2 * x_2 - x_1 ** 2 - 2 * x_2 ** 2 fun = x_1 ** 2 + 2 * x_2 ** 2 - 2 * x_1 * x_2 - 2 * x_2 # fun = x_1 ** 2 + x_2 ** 2 # 画三维图 figure = plt.figure() axes = plt.axes(projection='3d') X = np.arange(-10, 10, 0.25) Y = np.arange(-10, 10, 0.25) # 前两个参数为自变量取值范围 X, Y = np.meshgrid(X, Y) # Z = (2 * X * Y + 2 * Y - X ** 2 - 2 * Y ** 2) # z关于x,y的函数关系式,此处为锥面 Z = X ** 2 + 2 * Y ** 2 - 2 * X * Y - 2 * Y # Z = X ** 2 + Y ** 2 axes.plot_surface(X, Y, Z, cmap='rainbow') axes.set_xlabel('X') axes.set_ylabel('Y') axes.set_zlabel('Z') axes.set_title('Gradient Descent Method') # 对x_1和x_2求导 grad_1 = diff(fun, x_1) grad_2 = diff(fun, x_2) # 定义参数 MaxIter = 200 epsilon = 0.0001 # 定义迭代初始点 x_1_value = -2 x_2_value = 2 iter_cnt = 0 current_step_size = 100 grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf() print( 'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f step_size : %5.4f' % ( iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value, current_step_size)) while (iter_cnt = epsilon): # while (abs(grad_1_value) + abs(grad_2_value) >= epsilon): iter_cnt += 1 # 求迭代步长 t = symbols('t') x_1_updated = x_1_value + grad_1_value * t x_2_updated = x_2_value + grad_2_value * t Fun_updated = fun.subs({x_1: x_1_updated, x_2: x_2_updated}) grad_t = diff(Fun_updated, t) t_value = solve(grad_t, t)[0] # solve grad_t == 0 # update x_1_value and x_2_value grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) x_1_value = (float)(x_1_value + t_value * grad_1_value) x_2_value = (float)(x_2_value + t_value * grad_2_value) current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf() current_step_size = t_value # 迭代点变化 axes.scatter(x_1_value, x_2_value, current_obj, c='r', marker='*', linewidths=3) # 画出迭代点的变化 plt.pause(0.5) print( 'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f step_size : %5.4f' % ( iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value, current_step_size)) # 显示 plt.show() 2、牛顿法： import matplotlib.pyplot as plt from sympy import * import numpy as np '''===================牛顿法(Newton Method)===================''' # 定义符号变量 x_1 = symbols('x_1') x_2 = symbols('x_2') # 定义目标函数 # fun = 2 * x_1 * x_2 + 2 * x_2 - x_1 ** 2 - 2 * x_2 ** 2 # fun = x_1 ** 2 + 2 * x_2 ** 2 - 2 * x_1 * x_2 - 2 * x_2 # fun = x_1 ** 2 + x_2 ** 2 fun = (1 - x_1) ** 2 + 100 * (x_2 - x_1 ** 2) ** 2 # 画三维图 figure = plt.figure() axes = plt.axes(projection='3d') X = np.arange(-10, 10, 0.25) Y = np.arange(-10, 10, 0.25) # 前两个参数为自变量取值范围 X, Y = np.meshgrid(X, Y) # Z = (2 * X * Y + 2 * Y - X ** 2 - 2 * Y ** 2) # z关于x,y的函数关系式,此处为锥面 # Z = X ** 2 + 2 * Y ** 2 - 2 * X * Y - 2 * Y # Z = X ** 2 + Y ** 2 Z = (1 - X) ** 2 + 100 * (Y - X ** 2) ** 2 axes.plot_surface(X, Y, Z, cmap='rainbow') axes.set_xlabel('X') axes.set_ylabel('Y') axes.set_zlabel('Z') axes.set_title('Newton Method') # 对x_1和x_2求导 # 一阶导数 grad_1 = diff(fun, x_1) grad_2 = diff(fun, x_2) # 二阶导数 grad_12 = diff(grad_1, x_2) grad_11 = diff(fun, x_1, 2) grad_22 = diff(fun, x_2, 2) # 定义参数 MaxIter = 200 epsilon = 0.0001 # 定义迭代初始点 x_1_value = -10 x_2_value = 10 iter_cnt = 0 current_step_size = 100 grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf() current_obj_last = 1000 print( 'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f step_size : %5.4f' % ( iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value, current_step_size)) while (iter_cnt = epsilon): # while (abs(grad_1_value) + abs(grad_2_value) >= epsilon): current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf() iter_cnt += 1 # 构造梯度矩阵 G = np.vstack((grad_1_value, grad_2_value)) # 构造海森矩阵 H = np.vstack(([grad_11_value, grad_12_value], [grad_12_value, grad_22_value])) # 牛顿迭代法更新迭代点 z_value = np.vstack((x_1_value, x_2_value)) - np.linalg.inv(H) @ G x_1_value = (float)(z_value[0]) x_2_value = (float)(z_value[1]) current_obj_last = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf() grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) # 迭代点变化 axes.scatter(x_1_value, x_2_value, current_obj_last, c='r', marker='*', linewidths=3) # 画出迭代点的变化 plt.pause(0.5) print( 'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f ' % ( iter_cnt, x_1_value, x_2_value, current_obj_last, grad_1_value, grad_2_value)) # 显示 plt.show() 3、共轭梯度法： import matplotlib.pyplot as plt from sympy import * import numpy as np '''===================共轭梯度法(Conjugate gradient method,CG)===================''' # 定义符号变量 x_1 = symbols('x_1') x_2 = symbols('x_2') # 定义目标函数 # fun = -(2 * x_1 * x_2 + 2 * x_2 - x_1 ** 2 - 2 * x_2 ** 2) # fun = x_1 ** 2 + 2 * x_2 ** 2 - 2 * x_1 * x_2 - 2 * x_2 # fun = x_1 ** 2 + x_2 ** 2 fun = (1 - x_1) ** 2 + 100 * (x_2 - x_1 ** 2) ** 2 # 画三维图 figure = plt.figure() axes = plt.axes(projection='3d') X = np.arange(-10, 10, 0.25) Y = np.arange(-10, 10, 0.25) # 前两个参数为自变量取值范围 X, Y = np.meshgrid(X, Y) # Z = -(2 * X * Y + 2 * Y - X ** 2 - 2 * Y ** 2) # z关于x,y的函数关系式,此处为锥面 # Z = X ** 2 + 2 * Y ** 2 - 2 * X * Y - 2 * Y # Z = X ** 2 + Y ** 2 Z = (1 - X) ** 2 + 100 * (Y - X ** 2) ** 2 axes.plot_surface(X, Y, Z, cmap='rainbow') axes.set_xlabel('X') axes.set_ylabel('Y') axes.set_zlabel('Z') axes.set_title('Conjugate gradient method') # 对x_1和x_2求导 # 一阶导数 grad_1 = diff(fun, x_1) grad_2 = diff(fun, x_2) # 二阶导数 grad_12 = diff(grad_1, x_2) grad_11 = diff(fun, x_1, 2) grad_22 = diff(fun, x_2, 2) # 定义参数 MaxIter = 200 epsilon = 0.0001 # 定义迭代初始点 x_1_value = -2 x_2_value = 2 iter_cnt = 0 grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf() current_obj_last = 1000 print( 'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f ' % ( iter_cnt, x_1_value, x_2_value, current_obj, grad_1_value, grad_2_value)) while (iter_cnt = epsilon): # while (abs(grad_1_value) + abs(grad_2_value) >= epsilon): current_obj = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf() # 迭代点变化 axes.scatter(x_1_value, x_2_value, current_obj, c='r', marker='*', linewidths=3) # 画出迭代点的变化 plt.pause(0.5) iter_cnt += 1 # 构造梯度矩阵 G = np.vstack((grad_1_value, grad_2_value)) # 构造海森矩阵 H = np.vstack(([grad_11_value, grad_12_value], [grad_12_value, grad_22_value])) # 更新方向 if iter_cnt == 1: d = np.vstack((grad_1_value, grad_2_value)) else: d = -G + ((d.T @ H @ G) / (d.T @ H @ d)) * d # 更新步长 lamda = -(d.T @ G) / (d.T @ H @ d) # 牛顿迭代法更新迭代点 z_value = np.vstack((x_1_value, x_2_value)) + lamda * d x_1_value = (float)(z_value[0]) x_2_value = (float)(z_value[1]) current_obj_last = fun.subs({x_1: x_1_value, x_2: x_2_value}).evalf() grad_1_value = (float)(grad_1.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_2_value = (float)(grad_2.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_12_value = (float)(grad_12.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_11_value = (float)(grad_11.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) grad_22_value = (float)(grad_22.subs({x_1: x_1_value, x_2: x_2_value}).evalf()) print( 'itCnt: %2d cur_point (%3.2f, %3.2f) cur_Obj: %5.4f grad_1: %5.4f grad_2 : %5.4f ' % ( iter_cnt, x_1_value, x_2_value, current_obj_last, grad_1_value, grad_2_value)) # 显示 plt.show() 4、拉格朗日乘数法： from sympy import * x1, x2, k = symbols('x1,x2,k') f = 60 - 10 * x1 - 4 * x2 + (x1) ** 2 + (x2) ** 2 - x1 * x2 g = x1 + x2 - 8 # 构造拉格朗日等式 L = f - k * g # 求导，构造KKT条件 dx1 = diff(L, x1) # 对x1求偏导 dx2 = diff(L, x2) # 对x2求偏导 dk = diff(L, k) # 对k求偏导 # 求出个变量解 m = solve([dx1, dx2, dk], [x1, x2, k]) print('函数求得最值时,变量值为：{}'.format(m)) # {x1: 5, x2: 3, k: -3} # 给变量重新赋值 x1 = m[x1] x2 = m[x2] k = m[k] # 计算方程的值 f = 60 - 10 * x1 - 4 * x2 + (x1) ** 2 + (x2) ** 2 - x1 * x2 print("方程的极小值为：", f)

【本文地址】

公司简介

联系我们