非线性最小二乘之Guass-Newton和Levenberg-Marquardt

直接给出实现过程,主要参考类高翔博士的《SLAM十四讲》

本文中,使用到以下数据,函数模型为y = a*e^(b*t),残差函数为r = a*e^(b*t) - y,代价函数fx=0.5*r^2

    double t[8] = {1, 2, 3, 4, 5, 6, 7, 8}; //变量
    double y[8] = {8.3, 11.0, 14.7, 19.7, 26.7, 35.2, 44.4, 55.9};  //观测值

对模型函数的a和b分别求偏导

//the derivative for a of model function
double Jacobian_a(double ti, double a, double b)
{
    return exp(b*ti);
}

//the derivative for b of model function
double Jacobian_b(double ti, double a, double b)
{
    return a*ti*exp(b*ti);
}

高斯牛顿法

#include <iostream>
#include <vector>
#include <cmath>
#include <Eigen/Dense>

using namespace Eigen;
using namespace std;

int main() {
    std::cout << "Guassian-Newton iteration method" << std::endl;

    //初始化参数
    int N = 8;  //N组数据
    double delta=0.05; //步长阈值,当步长小于该值时停止迭代
    double t[8] = {1, 2, 3, 4, 5, 6, 7, 8}; //变量
    double y[8] = {8.3, 11.0, 14.7, 19.7, 26.7, 35.2, 44.4, 55.9};  //观测值


    int iterMax=5; //最大迭代次数
    double a=6., b=0.3; //初始化参数a和b
    double fx=0;
    for (int k = 0; k < iterMax; ++k)
    {
        std::cout <<" ============================" << std::endl;
        std::cout << k<<" iter !" << std::endl;

        //计算fx和残差
        fx=0;
        VectorXd r(8);
        for(int i=0; i<N; i++){
            double ri = a*exp(b*t[i]) - y[i];
            fx +=0.5*ri*ri;
            r(i) = ri;
        }
        cout<<"r = \n"<<r<<endl<<endl;
        cout<<"fx = "<<fx<<endl<<endl;

        //计算Jacobian矩阵
        MatrixXd JacobMat(8,2);
        for(int i=0; i<N; i++){
            JacobMat(i,0) = Jacobian_a(t[i], a, b);
            JacobMat(i,1) = Jacobian_b(t[i], a, b);
        }
        cout<<"JacobMat\n"<<JacobMat<<endl;

        Matrix2d JTJ = JacobMat.transpose()*JacobMat;
        cout<<"\nJTJ\n"<<JTJ<<endl;

        MatrixXd B = -JacobMat.transpose()*r;
        cout<<"\nB\n"<<B<<endl;

        //构建线性方程组并求解 ‘JTJ*x1 = B’
        Vector2d x1;
        x1 = JTJ.colPivHouseholderQr().solve(B);
        //x1 = JTJ.llt().solve(B);
        //x1 = JTJ.ldlt().solve(B);
        cout<<"\nx1\n"<<x1<<endl;

        double step_norm = x1.norm();   //步长
        cout<<"\nstep_size is "<<step_norm<<endl;

        if(step_norm<delta){
            std::cout <<k+1<<"  步长小于阈值,收敛,停止迭代 !" << std::endl;
            break;
        } else{
            a += x1(0);
            b += x1(1);
            cout<<"\nupdate 'a' and 'b'\n\ta is "<<a<<"; b is "<<b<<endl;

            fx=0;
            for(int i=0; i<N; i++){
                double ri = a*exp(b*t[i]) - y[i];
                fx +=0.5*ri*ri;
                r(i) = ri;
            }
            cout<<"\nupdate fx = "<<fx<<endl<<endl;

            if(k==N-1)
                std::cout <<k<<" 迭代次数大于 iterMax !" << std::endl;
        }
    }

    fx=0;
    for(int i=0; i<N; i++){
        double ri = a*exp(b*t[i]) - y[i];
        fx +=0.5*ri*ri;
    }
    cout<<"\nLastly 'a' and 'b'\n\ta is "<<a<<"; b is "<<b<<endl;
    cout<<"fx is "<<fx<<endl;

    return 0;
}

运行代码,迭代了三次后收敛,优化前后对比结果如下

//初始值
a is 6.;    b is 0.3;   fx is 63.6547
//优化之后
a is 7.0016;    b is 0.262038;  fx is 3.00657

列文伯格-马夸尔特方法

#include <iostream>
#include <vector>
#include <cmath>
#include <Eigen/Core>
#include <Eigen/Dense>

using namespace Eigen;
using namespace std;

int main() {
    std::cout << "Levenberg-Marquardt iteration method" << std::endl;
    int N = 8;
    double delta_step = 0.05;//判断收敛的步长的阈值
    double rho_delta = 0.25; //判断此次结算出的步长下,代价函数实际下降和近似下降的相似程度,大于该阈值时才进行更新
    double miu=0.1;  //步长的信赖域
    double lamda =1.;//拉格朗日乘子
    double pk_norm=0, pk_norm_last=0.01;//求解出的参数的变化量的模
    double t[8] = {1, 2, 3, 4, 5, 6, 7, 8};
    double y[8] = {8.3, 11.0, 14.7, 19.7, 26.7, 35.2, 44.4, 55.9};

    //初始化参数a和b
    double a=6., b=0.3;
    double fx=0, fx_update=0;
    int iterMax = 5;
    double rho=0.;
    for (int k = 0; k < iterMax; ++k)
    {
        std::cout <<" ============================" << std::endl;
        std::cout << k<<" iter !" << std::endl;

        int while_cnt=0;
        Vector2d x2;
        while(1) {
            fx=0;
            VectorXd r(8);
            for(int i=0; i<N; i++){
                double ri = a*exp(b*t[i]) - y[i];
                fx +=0.5*ri*ri;
                r(i,0) = ri;
            }
            //求fx和残差
            cout<<"r = \n"<<r<<endl<<endl;
            cout<<"fx = "<<fx<<endl<<endl;

            //求雅克比矩阵
            MatrixXd Jacobian(8,2);
            for(int i=0; i<N; i++){
                double Jai = Jacobian_a(t[i], a, b);
                double Jbi = Jacobian_b(t[i], a, b);
                Jacobian(i,0) = Jai;
                Jacobian(i,1) = Jbi;
            }
            cout<<Jacobian<<endl;

            Matrix2d JTJ = Jacobian.transpose()*Jacobian;
            Matrix2d A = JTJ + lamda*MatrixXd::Identity(2,2);
            MatrixXd B = -Jacobian.transpose()*r;

            //构建线性方程组,并求解 'A*x2 = B'
            x2 = A.colPivHouseholderQr().solve(B);
            pk_norm = x2.norm();
            cout<<"\nstep_size is "<<pk_norm<<endl;

            //计算 rho
            fx_update=0;
            for(int i=0; i<N; i++){
                double r0 = (a+x2(0)) * exp((b+x2(1))*t[i]) - y[i];
                fx_update +=0.5*r0*r0;
            }
            //1. 计算rho的分母,二阶
            double L_bias = 0.5*x2.transpose()*(lamda*x2+B);
            //2. 计算rho的分母,一阶
            //L_bias = -0.5*r.transpose()*Jacobian*x2;
            rho = (fx-fx_update)/L_bias;

            cout<<"fx delta is "<<(fx-fx_update)<<endl;
            cout<<"L_bias is "<<L_bias<<endl;
            cout<<"rho is "<<rho<<endl;

            //更新miu值
            if(rho>0.75){
                miu*=2.;
            }else if(rho<0.25){
                miu*=0.5;
            }

            std::cout<<"\nwhile_cnt is "<<while_cnt<<"  "<<x2<<"  "<miu<<"\n";
            //rho = model function的实际下降 / phi function的近似下降、
            // 当rho大于阈值rho_delta时,认为实际下降和近似下降近似,该拟合可行
            // 并且步长在信赖区域内
            if ((rho>0.25) && (pk_norm<miu))
                break;

            while_cnt++;
            if(while_cnt>10){
                std::cout<<"陷入步长小于设定的阈值,重新选择步长阈值\n";
                exit(0);
            }
        }

        //更新lamda,不大于1
        if(pk_norm>pk_norm_last)
            lamda *= 0.1;
        else
            lamda = lamda*2 > 1 ? 1. : lamda*2;
        pk_norm_last = pk_norm;

        cout<<"\nfx = "<<fx<<endl;
        cout<<"\nupdate fx = "<<fx_update<<endl;

        a += x2(0);
        b += x2(1);

        //判断是否收敛
        if(pk_norm<delta_step){
            cout<<"步长很小,收敛并退出"<<endl;
            cout<<"lamda is "<<lamda<<endl;
            break;
        }
        if(k==N-1)
            std::cout <<k<<" 迭代次数大于 iterMax !" << std::endl;
    }

    fx=0;
    for(int i=0; i<N; i++){
        double ri = a*exp(b*t[i]) - y[i];
        fx +=0.5*ri*ri;
    }
    cout<<"\nLastly 'a' and 'b'\n\ta is "<<a<<"; b is "<<b<<endl;
    cout<<"fx is "<<fx<<endl;

    return 0;
}

同样迭代三次之后收敛

//初始值
a is 6.;    b is 0.3;   fx is 63.6547
//优化之后
a is 7.00008; b is 0.262078; fx is 3.00654

小结
代码是参考了很多资料,结合自己的认识写的,有不同想法的欢迎交流[email protected]

参数miu应该是和lamda有联系的,从而改变步长,使其在信赖区域内,但是十四讲中并未涉及到,在参考资料3中有讲解

高斯牛顿法其实是用Jacobian^T*Jacobian近似Hessian矩阵,节省了计算,但是在计算中Jacobian^T*Jacobian只有半正定性,可能计算不出正确的结果,导致步长太大或者局部不够准确。 
列文伯格-马夸尔特方法中当lamda比较小的时候,说明二次近似模型在该区域内是比较好的,更接近与高斯牛顿法;但是当lamda比较大的时候,更接近于一阶梯度下降法。它在一定程度上避免了现行方程组的系数矩阵的非奇异和病态问题,给出更好的解。

参考资料
高翔 《SLAM十四讲》
Alfonso Croeze《solving nonlinear least squares problems with the gauss-newton and levenberg-marquardt method》
K.Madsen《methods for non-linear least squares probles》
转:https://blog.csdn.net/qq_31806429/article/details/82667779

猜你喜欢

转载自blog.csdn.net/eric_e/article/details/85241539
今日推荐