SVO稀疏图像对齐代码分析

　　SVO使用稀疏直接法计算两帧之间的初始相机位姿，即使用两帧之间稀疏的4*4 patch的光度误差为损失函数，使用G-N优化算法获得两帧之间的位姿变换，由于没有特征匹配过程效率较高。相比自己实现的稀疏直接法的代码，总是难以达到理想的效率，因此分析SVO代码如下：

　　首先是入口函数：

//输入的是参考帧和当前帧，即连续的两帧
size_t SparseAlign::run(FramePtr ref_frame, FramePtr cur_frame)
{
  reset();
  if(ref_frame->fts_.empty())
  {
    SVO_WARN_STREAM("SparseAlign: no features to track!");
    return 0;
  }

  ref_frame_ = ref_frame;
  cur_frame_ = cur_frame;

　//ref_patch_cache_：参考帧patch的缓存，即每行一个特征patch的16个像素灰度值；
  ref_patch_cache_ = cv::Mat(ref_frame_->fts_.size(), patch_area_, CV_32F);           //  create  n x 16 matrix

　//雅克比矩阵，每一个特征patch的每个像素对应一个6*1的雅克比；
  jacobian_cache_.resize(Eigen::NoChange, ref_patch_cache_.rows*patch_area_);
  visible_fts_.resize(ref_patch_cache_.rows, false); // TODO: should it be reset at each level?   // n x 1

　//帧间位姿初始化；
  SE3 T_cur_from_ref(cur_frame_->T_f_w_ * ref_frame_->T_f_w_.inverse());

　//金字塔迭代，从最高层（即分辨率最低）开始迭代，到最低层（原始图像）；
  for(level_=max_level_; level_>=min_level_; --level_)
  {
　　//每次迭代，雅克比都置0；
    jacobian_cache_.setZero();
　　//这个参数用于控制参考帧patch的缓存ref_patch_cache_是否被初始化；
    have_ref_patch_cache_ = false;
    if(verbose_)printf("\nPYRAMID LEVEL %i\n---------------\n", level_);
　　//迭代优化函数；
    optimize(T_cur_from_ref);
  }

  //优化完成之后左乘获得世界坐标系原点到当前帧的变换；
  cur_frame_->T_f_w_ = T_cur_from_ref * ref_frame_->T_f_w_;

  return n_meas_/patch_area_; //返回的是平均特征数量；
}

　　迭代优化函数：

void  SparseAlign::optimize(SE3& model)
{
  // Save the old model to rollback in case of unsuccessful update
  SE3 old_model(model);

  // perform iterative estimation
  for (iter_ = 0; iter_<n_iter_; ++iter_)
  {
    rho_ = 0;
    //G-N优化过程中的H矩阵，和g矩阵（J*b）
    H_.setZero();
    Jres_.setZero();

    // compute initial error
    n_meas_ = 0;
    //根据两帧之间的初始位姿变换，计算两帧之间patch的残差并返回；
    double new_chi2 = computeResiduals(model, true, false);

    // solve the linear system
    if(!solve())
    {
      //求解出现问题就退出迭代，具体就是出现nan；
      stop_ = true;
    }

    // 判断cost是否增长了，正常来说光度误差是下降的，如上升则说明上一次迭代是最优的，并把上轮迭代结果输出
    if((iter_ > 0 && new_chi2 > chi2_) || stop_)
    {
      model = old_model; // rollback
      break;
    }

    // 如果光度误差下降了，则更新迭代结果
    SE3 new_model;
    update(model, new_model);

    //变量交换，准备下次迭代；
    old_model = model;
    model = new_model;

    chi2_ = new_chi2;

    //没实际意义
    finishIteration();

    // stop when converged, i.e. update step too small
    if(norm_max(x_)<=eps_)
      break;
  }

}

　　残差计算函数：

double SparseAlign::computeResiduals(
    const SE3& T_cur_from_ref,
    bool linearize_system,
    bool compute_weight_scale)
{
  //当前迭代金字塔层的图像
  const cv::Mat& cur_img = cur_frame_->img_pyr_.at(level_);

  //可忽略
  if(linearize_system && display_)
    resimg_ = cv::Mat(cur_img.size(), CV_32F, cv::Scalar(0));

  //预计算参考帧特征patch的缓存，即将ref_patch_cache_开辟的存储空间填上相应的值，见下面的分析
  //可以暂时认为ref_patch_cache_中已经有值了； 
  if(have_ref_patch_cache_ == false)
    precomputeReferencePatches();

  // compute the weights on the first iteration 可忽略
  std::vector<float> errors;
  if(compute_weight_scale)
    errors.reserve(visible_fts_.size());

  //下面这段是临时变量
  const int stride = cur_img.cols;              //类似与OpenCV Mat中的步；
  const int border = patch_halfsize_+1;         //patch的边界；
  const float scale = 1.0f/(1<<level_);         //对应金字塔层，1<<level_表示2的level_次方；

  const Vector3d ref_pos(ref_frame_->pos());    //参考帧在世界坐标系中的位置，即T(R|t)中的t；
  float chi2 = 0.0;                             //光度误差cost

  size_t feature_counter = 0; // is used to compute the index of the cached jacobian 特征索引；
  std::vector<bool>::iterator visiblity_it = visible_fts_.begin();
  for(auto it=ref_frame_->fts_.begin(); it!=ref_frame_->fts_.end();
      ++it, ++feature_counter, ++visiblity_it)
  {
    // check if feature is within image； visiblity_it参数在预计算过程中初始化；
    if(!*visiblity_it)
      continue;

    // compute pixel location in cur img 计算该特征通过位姿变换和相机投影过程后，在当前帧中的像素坐标；
    const double depth = ((*it)->point->pos_ - ref_pos).norm();     //通过特征的世界坐标和参考帧的世界坐标计算深度值；
    const Vector3d xyz_ref((*it)->f*depth);                         //f为特征球面归一化之后的相机坐标系下的坐标；
    const Vector3d xyz_cur(T_cur_from_ref * xyz_ref);               //参考帧特征通过初始化的或上次迭代估计的值计算在当前帧下的特征坐标；
    const Vector2f uv_cur_pyr(cur_frame_->cam_->world2cam(xyz_cur).cast<float>() * scale);  //通过相机投影变换获得图像像素坐标，注意金字塔scale；
    const float u_cur = uv_cur_pyr[0];                              //像素坐标浮点型和整型，用于双线性插值；
    const float v_cur = uv_cur_pyr[1];
    const int u_cur_i = floorf(u_cur);
    const int v_cur_i = floorf(v_cur);

    // check if projection is within the image
    if(u_cur_i < 0 || v_cur_i < 0 || u_cur_i-border < 0 || v_cur_i-border < 0 || u_cur_i+border >= cur_img.cols || v_cur_i+border >= cur_img.rows)
      continue;
    if(u_cur_i > 10000 || v_cur_i > 10000)      continue;   // fix segment fault bug

    // compute bilateral interpolation weights for the current image
    //通过双线性插值计算像素光度值；
    const float subpix_u_cur = u_cur-u_cur_i;                           //子像素值；
    const float subpix_v_cur = v_cur-v_cur_i;

    //双线性插值参数，tl：topleft，tr：topright，bl：bottomleft，br：bottomright
    const float w_cur_tl = (1.0-subpix_u_cur) * (1.0-subpix_v_cur);     
    const float w_cur_tr = subpix_u_cur * (1.0-subpix_v_cur);
    const float w_cur_bl = (1.0-subpix_u_cur) * subpix_v_cur;
    const float w_cur_br = subpix_u_cur * subpix_v_cur;

    //指向参考帧特征patch的指针：头指针+特征数*每个特征patch的像素个数；
    float* ref_patch_cache_ptr = reinterpret_cast<float*>(ref_patch_cache_.data) + patch_area_*feature_counter;
    size_t pixel_counter = 0; // is used to compute the index of the cached jacobian
    for(int y=0; y<patch_size_; ++y)
    {
      //指向当前帧像素值的指针，4*4patch的左上角开始；
      uint8_t* cur_img_ptr = (uint8_t*) cur_img.data + (v_cur_i+y-patch_halfsize_)*stride + (u_cur_i-patch_halfsize_);

      //注意各个指针递增；
      for(int x=0; x<patch_size_; ++x, ++pixel_counter, ++cur_img_ptr, ++ref_patch_cache_ptr)
      {
        // compute residual 根据双线性插值计算当前pixel的像素值；
        const float intensity_cur = w_cur_tl*cur_img_ptr[0] + w_cur_tr*cur_img_ptr[1] + w_cur_bl*cur_img_ptr[stride] + w_cur_br*cur_img_ptr[stride+1];
        //计算残差：当前帧-参考帧
        const float res = intensity_cur - (*ref_patch_cache_ptr);

        // used to compute scale for robust cost  可忽略；
        if(compute_weight_scale)
          errors.push_back(fabsf(res));

        // robustification 可忽略
        float weight = 1.0;
        if(use_weights_) {
          //weight = weight_function_->value(res/scale_);
        }

        //差值平方累加和
        chi2 += res*res*weight;
        n_meas_++;

        //求解雅克比过程
        if(linearize_system)
        {
          // compute Jacobian, weighted Hessian and weighted "steepest descend images" (times error)
          //取出当前特征对应的雅克比矩阵，因为使用的是逆向组合算法，所以jacobian_cache_预先计算好了，存储了所有特征的雅克比
          const Vector6d J(jacobian_cache_.col(feature_counter*patch_area_ + pixel_counter));
          H_.noalias() += J*J.transpose()*weight;
          Jres_.noalias() -= J*res*weight;
          if(display_)
            resimg_.at<float>((int) v_cur+y-patch_halfsize_, (int) u_cur+x-patch_halfsize_) = res/255.0;
        }
      }
    }
  }


  //返回的是cost的平均值；
  return chi2/n_meas_;
}

　　预计算参考帧的相关信息和上面的残差计算过程差不多：

void SparseAlign::precomputeReferencePatches()
{
  //临时变量
  const int border = patch_halfsize_+1;                       //边界；
  const cv::Mat& ref_img = ref_frame_->img_pyr_.at(level_);   //参考帧图像；
  const int stride = ref_img.cols;                            //步长；
  const float scale = 1.0f/(1<<level_);                       //金字塔层尺度；

  const Vector3d ref_pos = ref_frame_->pos();                 //参考帧位置；
  const double focal_length = ref_frame_->cam_->errorMultiplier2();//焦距

  size_t feature_counter = 0;
  std::vector<bool>::iterator visiblity_it = visible_fts_.begin();
  for(auto it=ref_frame_->fts_.begin(), ite=ref_frame_->fts_.end();
      it!=ite; ++it, ++feature_counter, ++visiblity_it)
  {
    //在当前金字塔层下的像素坐标值；
    // check if reference with patch size is within image
    const float u_ref = (*it)->px[0]*scale;
    const float v_ref = (*it)->px[1]*scale;
    const int u_ref_i = floorf(u_ref);
    const int v_ref_i = floorf(v_ref);
    if((*it)->point == NULL || u_ref_i-border < 0 || v_ref_i-border < 0 || u_ref_i+border >= ref_img.cols || v_ref_i+border >= ref_img.rows)
      continue;

    //该特征是否可视在这里改变状态，初始化时为false；
    *visiblity_it = true;

    // cannot just take the 3d points coordinate because of the reprojection errors in the reference image!!!
    //通过特征点世界坐标和参考帧位置计算深度值
    const double depth(((*it)->point->pos_ - ref_pos).norm());
    const Vector3d xyz_ref((*it)->f*depth);

    // evaluate projection jacobian
    //获取2*6的投影雅克比矩阵，该雅克比没有乘以焦距，如下所示
    Matrix<double,2,6> frame_jac;
    Frame::jacobian_xyz2uv(xyz_ref, frame_jac);

    // inline static void jacobian_xyz2uv(
        //     const Eigen::Vector3d& xyz_in_f,
        //     Eigen::Matrix<double, 2, 6>& J)
        // {
        //     const double x = xyz_in_f[0];
        //     const double y = xyz_in_f[1];
        //     const double z_inv = 1. / xyz_in_f[2];
        //     const double z_inv_2 = z_inv*z_inv;

        //     J(0, 0) = -z_inv;               // -1/z
        //     J(0, 1) = 0.0;                  // 0
        //     J(0, 2) = x*z_inv_2;            // x/z^2
        //     J(0, 3) = y*J(0, 2);            // x*y/z^2
        //     J(0, 4) = -(1.0 + x*J(0, 2));   // -(1.0 + x^2/z^2)
        //     J(0, 5) = y*z_inv;              // y/z

        //     J(1, 0) = 0.0;                  // 0
        //     J(1, 1) = -z_inv;               // -1/z
        //     J(1, 2) = y*z_inv_2;            // y/z^2
        //     J(1, 3) = 1.0 + y*J(1, 2);      // 1.0 + y^2/z^2
        //     J(1, 4) = -J(0, 3);             // -x*y/z^2
        //     J(1, 5) = -x*z_inv;             // x/z
        // }

    //双线性插值参数，和computeresidual函数中一样
    // compute bilateral interpolation weights for reference image
    const float subpix_u_ref = u_ref-u_ref_i;
    const float subpix_v_ref = v_ref-v_ref_i;
    const float w_ref_tl = (1.0-subpix_u_ref) * (1.0-subpix_v_ref);
    const float w_ref_tr = subpix_u_ref * (1.0-subpix_v_ref);
    const float w_ref_bl = (1.0-subpix_u_ref) * subpix_v_ref;
    const float w_ref_br = subpix_u_ref * subpix_v_ref;

    //cache_ptr：指向ref_patch_cache_的指针，前面仅开辟了内存空间，这里通过指针填值；
    size_t pixel_counter = 0;
    float* cache_ptr = reinterpret_cast<float*>(ref_patch_cache_.data) + patch_area_*feature_counter;
    for(int y=0; y<patch_size_; ++y)
    {
      //指向参考帧像素的指针；
      uint8_t* ref_img_ptr = (uint8_t*) ref_img.data + (v_ref_i+y-patch_halfsize_)*stride + (u_ref_i-patch_halfsize_);
      for(int x=0; x<patch_size_; ++x, ++ref_img_ptr, ++cache_ptr, ++pixel_counter)
      {
        // precompute interpolated reference patch color
        //通过双线性插值，给ref_patch_cache_填值；
        *cache_ptr = w_ref_tl*ref_img_ptr[0] + w_ref_tr*ref_img_ptr[1] + w_ref_bl*ref_img_ptr[stride] + w_ref_br*ref_img_ptr[stride+1];

        // we use the inverse compositional: thereby we can take the gradient always at the same position
        // get gradient of warped image (~gradient at warped position)
        //计算像素梯度值，0.5*(u[1]-u[-1]), 0.5*(v[1]-v[-1])，其中的每个像素值都使用双线性插值获得
        float dx = 0.5f * ((w_ref_tl*ref_img_ptr[1] + w_ref_tr*ref_img_ptr[2] + w_ref_bl*ref_img_ptr[stride+1] + w_ref_br*ref_img_ptr[stride+2])
                          -(w_ref_tl*ref_img_ptr[-1] + w_ref_tr*ref_img_ptr[0] + w_ref_bl*ref_img_ptr[stride-1] + w_ref_br*ref_img_ptr[stride]));
        float dy = 0.5f * ((w_ref_tl*ref_img_ptr[stride] + w_ref_tr*ref_img_ptr[1+stride] + w_ref_bl*ref_img_ptr[stride*2] + w_ref_br*ref_img_ptr[stride*2+1])
                          -(w_ref_tl*ref_img_ptr[-stride] + w_ref_tr*ref_img_ptr[1-stride] + w_ref_bl*ref_img_ptr[0] + w_ref_br*ref_img_ptr[1]));

        // cache the jacobian
        //计算像素雅克比，即像素梯度*投影雅克比；
        jacobian_cache_.col(feature_counter*patch_area_ + pixel_counter) =
            (dx*frame_jac.row(0) + dy*frame_jac.row(1))*(focal_length / (1<<level_));
      }
    }
  }
  //该参数置真，说明ref_patch_cache_已经填值；
  have_ref_patch_cache_ = true;
}

　　solve()函数和update()函数为基本的矩阵运算函数，reset()函数为参数重置函数，没什么可说的。startIteration()和finishIteration()函数可以忽略。

　　总结来看，广泛使用指针，应该比直接取值速度更快，逆向组合算法，预先计算雅克比矩阵可以节省计算量，几处双线性插值方法对提高精度有帮助。相比SVO使用的G-N优化方法，LSD-SLAM中使用的是L-M算法，且明显增加了patch的模块计算部分。

SVO稀疏图像对齐代码分析

猜你喜欢