Ceres简介及示例(3)Powell’s Function(鲍威尔方程)

1、问题描述

考虑一个稍微复杂些的例子:鲍威尔方程的最小值。定义一个参数块 x = [ x 1 , x 2 , x 3 , x 4 ] x = \left[x_1, x_2, x_3, x_4 \right] x=[x1,x2,x3,x4],同时定义
f 1 ( x ) = x 1 + 10 x 2 f 2 ( x ) = 5 ( x 3 − x 4 ) f 3 ( x ) = ( x 2 − 2 x 3 ) 2 f 4 ( x ) = 10 ( x 1 − x 4 ) 2 F ( x ) = [ f 1 ( x ) ,   f 2 ( x ) ,   f 3 ( x ) ,   f 4 ( x ) ] \begin{split} f_1(x) &= x_1 + 10x_2 \\ f_2(x) &= \sqrt{5} (x_3 - x_4)\\ f_3(x) &= (x_2 - 2x_3)^2\\ f_4(x) &= \sqrt{10} (x_1 - x_4)^2\\\\ F(x) &= \left[f_1(x),\ f_2(x),\ f_3(x),\ f_4(x) \right] \end{split} f1(x)f2(x)f3(x)f4(x)F(x)=x1+10x2=5 (x3x4)=(x22x3)2=10 (x1x4)2=[f1(x), f2(x), f3(x), f4(x)]

F ( x ) F(x) F(x) 是关于上面四个残差值的方程,希望寻找到一组 x x x,使得 1 2 ∥ F ( x ) ∥ 2 \frac{1}{2}\|F(x)\|^2 21F(x)2 最小。

2、目标函数定义

第一步是定义目标函数中的每一项需评估的仿函数,以评估 f 4 ( x 1 , x 4 ) f_4(x_1, x_4) f4(x1,x4) 的代码为例

struct F4 {
    
    
  template <typename T>
  bool operator()(const T* const x1, const T* const x4, T* residual) const {
    
    
    residual[0] = sqrt(10.0) * (x1[0] - x4[0]) * (x1[0] - x4[0]);
    return true;
  }
};

相似地定义 F1, F2 和 F3 评估 f 1 ( x 1 , x 2 ) f_1(x_1, x_2) f1(x1,x2) f 2 ( x 3 , x 4 ) f_2(x_3, x_4) f2(x3,x4) f 3 ( x 2 , x 3 ) f_3(x_2, x_3) f3(x2,x3)

3、使用自动梯度法加入到Problem中

完成1,2步之后,当前Problem可以通过以下方式构建

double x1 =  3.0; double x2 = -1.0; double x3 =  0.0; double x4 = 1.0;

Problem problem;

// Add residual terms to the problem using the autodiff
// wrapper to get the derivatives automatically.
problem.AddResidualBlock(
  new AutoDiffCostFunction<F1, 1, 1, 1>(new F1), nullptr, &x1, &x2);
problem.AddResidualBlock(
  new AutoDiffCostFunction<F2, 1, 1, 1>(new F2), nullptr, &x3, &x4);
problem.AddResidualBlock(
  new AutoDiffCostFunction<F3, 1, 1, 1>(new F3), nullptr, &x2, &x3);
problem.AddResidualBlock(
  new AutoDiffCostFunction<F4, 1, 1, 1>(new F4), nullptr, &x1, &x4);

注意,每个ResidualBlock只依赖于对应残差对象所依赖的两个参数,而不是所有四个参数。模板参数如<F2, 1, 1, 1>第一个1,指输出参数残差的维度,后面的1都是输入参数的维度,即第二个1是指&x3数据的维度,即第三个1是指这个&x4数据的维度。

另外,我们可以将4个参数数据合并成一个向量,缩小模板参数的个数,后面给出直接使用四个参数-向量的方法。

4、完整示例代码

#include <vector>
#include "ceres/ceres.h"
#include "gflags/gflags.h"
#include "glog/logging.h"

using ceres::AutoDiffCostFunction;
using ceres::CostFunction;
using ceres::Problem;
using ceres::Solver;
using ceres::Solve;

struct F1 {
    
    
  template <typename T> bool operator()(const T* const x1,
                                        const T* const x2,
                                        T* residual) const {
    
    
    // f1 = x1 + 10 * x2;
    residual[0] = x1[0] + 10.0 * x2[0];
    return true;
  }
};

struct F2 {
    
    
  template <typename T> bool operator()(const T* const x3,
                                        const T* const x4,
                                        T* residual) const {
    
    
    // f2 = sqrt(5) (x3 - x4)
    residual[0] = sqrt(5.0) * (x3[0] - x4[0]);
    return true;
  }
};

struct F3 {
    
    
  template <typename T> bool operator()(const T* const x2,
                                        const T* const x3,
                                        T* residual) const {
    
    
    // f3 = (x2 - 2 x3)^2
    residual[0] = (x2[0] - 2.0 * x3[0]) * (x2[0] - 2.0 * x3[0]);
    return true;
  }
};

struct F4 {
    
    
  template <typename T> bool operator()(const T* const x1,
                                        const T* const x4,
                                        T* residual) const {
    
    
    // f4 = sqrt(10) (x1 - x4)^2
    residual[0] = sqrt(10.0) * (x1[0] - x4[0]) * (x1[0] - x4[0]);
    return true;
  }
};

// DEFINE_string(minimizer, "trust_region",
//               "Minimizer type to use, choices are: line_search & trust_region");

int main(int argc, char** argv) {
    
    
//   CERES_GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
//   google::InitGoogleLogging(argv[0]);

  double x1 =  3.0;
  double x2 = -1.0;
  double x3 =  0.0;
  double x4 =  1.0;

  Problem problem;
  // Add residual terms to the problem using the using the autodiff
  // wrapper to get the derivatives automatically. The parameters, x1 through
  // x4, are modified in place.
  problem.AddResidualBlock(new AutoDiffCostFunction<F1, 1, 1, 1>(new F1),
                           NULL, &x1, &x2);
  problem.AddResidualBlock(new AutoDiffCostFunction<F2, 1, 1, 1>(new F2),
                           NULL, &x3, &x4);
  problem.AddResidualBlock(new AutoDiffCostFunction<F3, 1, 1, 1>(new F3),
                           NULL, &x2, &x3);
  problem.AddResidualBlock(new AutoDiffCostFunction<F4, 1, 1, 1>(new F4),
                           NULL, &x1, &x4);

  Solver::Options options;
//   LOG_IF(FATAL, !ceres::StringToMinimizerType(FLAGS_minimizer,
//                                               &options.minimizer_type))
//       << "Invalid minimizer: " << FLAGS_minimizer
//       << ", valid options are: trust_region and line_search.";

  options.max_num_iterations = 100;
  options.linear_solver_type = ceres::DENSE_QR;
  options.minimizer_progress_to_stdout = true;

  std::cout << "Initial x1 = " << x1
            << ", x2 = " << x2
            << ", x3 = " << x3
            << ", x4 = " << x4
            << "\n";

  // Run the solver!
  Solver::Summary summary;
  Solve(options, &problem, &summary);

  std::cout << summary.FullReport() << "\n";
  std::cout << "Final x1 = " << x1
            << ", x2 = " << x2
            << ", x3 = " << x3
            << ", x4 = " << x4
            << "\n";
  return 0;
}

运行结果如下。显而易见的是,这个问题的最优解是在x1=0,x2=0,x3=0,x4=0时,目标函数值为0。

Initial x1 = 3, x2 = -1, x3 = 0, x4 = 1
iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
   0  1.075000e+02    0.00e+00    1.55e+02   0.00e+00   0.00e+00  1.00e+04        0    3.57e-04    7.32e-04
   1  5.036190e+00    1.02e+02    2.00e+01   2.16e+00   9.53e-01  3.00e+04        1    8.20e-04    1.67e-03
   2  3.148168e-01    4.72e+00    2.50e+00   6.23e-01   9.37e-01  9.00e+04        1    3.91e-04    2.13e-03
   3  1.967760e-02    2.95e-01    3.13e-01   3.08e-01   9.37e-01  2.70e+05        1    3.88e-04    2.59e-03
   4  1.229900e-03    1.84e-02    3.91e-02   1.54e-01   9.37e-01  8.10e+05        1    3.98e-04    3.08e-03
   5  7.687123e-05    1.15e-03    4.89e-03   7.69e-02   9.37e-01  2.43e+06        1    3.89e-04    3.54e-03
   6  4.804625e-06    7.21e-05    6.11e-04   3.85e-02   9.37e-01  7.29e+06        1    3.86e-04    4.00e-03
   7  3.003028e-07    4.50e-06    7.64e-05   1.92e-02   9.37e-01  2.19e+07        1    3.85e-04    4.46e-03
   8  1.877006e-08    2.82e-07    9.54e-06   9.62e-03   9.37e-01  6.56e+07        1    3.88e-04    4.92e-03
   9  1.173223e-09    1.76e-08    1.19e-06   4.81e-03   9.37e-01  1.97e+08        1    3.85e-04    5.37e-03
  10  7.333425e-11    1.10e-09    1.49e-07   2.40e-03   9.37e-01  5.90e+08        1    4.02e-04    5.86e-03
  11  4.584044e-12    6.88e-11    1.86e-08   1.20e-03   9.37e-01  1.77e+09        1    3.88e-04    6.32e-03
  12  2.865573e-13    4.30e-12    2.33e-09   6.02e-04   9.37e-01  5.31e+09        1    3.93e-04    6.79e-03
  13  1.791438e-14    2.69e-13    2.91e-10   3.01e-04   9.37e-01  1.59e+10        1    3.88e-04    7.25e-03
  14  1.120029e-15    1.68e-14    3.64e-11   1.51e-04   9.37e-01  4.78e+10        1    3.91e-04    7.72e-03

Solver Summary (v 1.14.0-eigen-(3.3.7)-no_lapack-eigensparse-openmp-no_tbb-no_custom_blas)

                                     Original                  Reduced
Parameter blocks                            4                        4
Parameters                                  4                        4
Residual blocks                             4                        4
Residuals                                   4                        4

Minimizer                        TRUST_REGION

Dense linear algebra library            EIGEN
Trust region strategy     LEVENBERG_MARQUARDT

                                        Given                     Used
Linear solver                        DENSE_QR                 DENSE_QR
Threads                                     1                        1
Linear solver ordering              AUTOMATIC                        4

Cost:
Initial                          1.075000e+02
Final                            1.120029e-15
Change                           1.075000e+02

Minimizer iterations                       15
Successful steps                           15
Unsuccessful steps                          0

Time (in seconds):
Preprocessor                         0.000375

  Residual only evaluation           0.000170 (14)
  Jacobian & residual evaluation     0.001752 (15)
  Linear solver                      0.002837 (14)
Minimizer                            0.007452

Postprocessor                        0.000061
Total                                0.007888

Termination:                      CONVERGENCE (Gradient tolerance reached. Gradient max norm: 3.642190e-11 <= 1.000000e-10)

Final x1 = 0.000146222, x2 = -1.46222e-05, x3 = 2.40957e-05, x4 = 2.40957e-05

5、优化 缩减参数-合并问题

前面使用4个优化函数,分别输出一个残差项,两个输入参数且每个输入参数的纬度都是1。这里直接将4个优化函数合并参一个,直接输出4个残差项,并且输入参数仅一个但是纬度为4。

5.1、仿函数

第一种方式,我们将4个输入参数用数组描述即 double x[] = {x1,x2,x3,x4} ,此时代码中参数可以传递 数组的指针x 且明确通过模板指出其维度。

完整代码如下:

struct F {
    
    
  template <typename T> bool operator()(const T* const x, T* residual) const {
    
    
    residual[0] = x[0] + 10.0 * x[1];
    residual[1] = sqrt(5.0) * (x[2] - x[3]);
    residual[2] = pow(x[1] - 2.0 * x[2], 2);
    residual[3] = sqrt(10.0) * pow(x[0] - x[3], 2);
    return true;
  }
};

int main()
{
    
    
    double x[] = {
    
    3.0, -1., 0., 1.};

    Problem problem;
    problem.AddResidualBlock(new AutoDiffCostFunction<F, 4, 4>(new F),
                             NULL,
                             x);

    Solver::Options options;
    options.max_num_iterations = 100;
    options.linear_solver_type = ceres::DENSE_QR;
    options.minimizer_progress_to_stdout = true;

    std::cout << "Initial x1 = " << x[0]
        << ", x2 = " << x[1]
        << ", x3 = " << x[2]
        << ", x4 = " << x[3]
        << "\n";

    // Run the solver!
    Solver::Summary summary;
    Solve(options, &problem, &summary);

    std::cout << summary.FullReport() << "\n";
    std::cout << "Final x1 = " << x[0]
        << ", x2 = " << x[1]
        << ", x3 = " << x[2]
        << ", x4 = " << x[3]
        << "\n";
    return 0;
}

5.2、编译器模板类SizedCostFunction

编译时知道参数块的大小和残差向量的大小(这是常见的情况),则可以使用SizedCostFunction,这些值可以被指定为模板参数,只需要实现CostFunction::Evaluate()函数

修改的代码如下。

/*  修改部分1  */
//struct F {
    
    
//    template <typename T> bool operator()(const T* const x, T* residual) const
//    {
    
    
//        residual[0] = x[0] + 10.0 * x[1];
//        residual[1] = sqrt(5.0) * (x[2] - x[3]);
//        residual[2] = pow(x[1] - 2.0 * x[2], 2);
//        residual[3] = sqrt(10.0) * pow(x[0] - x[3], 2);
//        return true;
//    }
//};
class F : public ceres::SizedCostFunction<4,4>
{
    
    
public:
    virtual bool Evaluate(double const* const* x,
                          double* residual,
                          double** jacobians) const override
    {
    
    
        residual[0] = x[0][0] + 10.0 * x[1][0];
        residual[1] = sqrt(5.0) * (x[2][0] - x[3][0]);
        residual[2] = pow(x[1][0] - 2.0 * x[2][0], 2);
        residual[3] = sqrt(10.0) * pow(x[0][0] - x[3][0], 2);
        // 问题,这里优化的目标到底是什么?如何写?
		if(jacobians && jacobians[0]) {
    
    
		}
        return true;
    }
};

    
/*  修改部分2  */
//problem.AddResidualBlock(new AutoDiffCostFunction<F, 4, 4>(new F),
//                         NULL,
//                         x);
problem.AddResidualBlock(new F,
                          NULL,
                          x);

不同于使用仿函数中参数x是一个数组,直接通过下标即可。这里Evaluate中参数被转换为一个2维数组,数组元素是每个参数的指针,可以认为如下区别

double x1,x2,x3,x4; 
// 仿函数中 参数 x
double Func_x[] = {
    
    x1, x2, x3, x4}
// SizedCostFunction.Evaluate()函数中 参数 x 
double *Class_x[]= {
    
    &x1, &x2, &x3, &x4}

猜你喜欢

转载自blog.csdn.net/wanggao_1990/article/details/129547043
今日推荐