VO：简单的视觉里程计代码注释

走完SLAM十四讲前端之后，代码都已经注释完，但还是感觉有点迷茫，所以专门参考冯兵的博客，实现简单的视觉里程计。

收获是又重新认识到了C++基础的薄弱，决定之后的晚上要刷牛客题。不过就SLAM前端而言这部分基本可以理解代码了，这篇对VO代码进行注释。

基本过程：
1、获取图像
2、对图像进行处理
3、通过FAST算法对图像进行特征检测，通过KLT光流法跟踪图像的特征，如果跟踪的特征有所丢失，特征数小于某个阈值，那么重新特征检测。
4、通过RANSAC的5点算法来估计出两幅图像的本质矩阵。
5、通过本质矩阵进行估计R和t
6、对尺度信息进行估计，最终确定旋转矩阵和平移向量

先放效果图

Demo video

主程序main.cpp

#include <fstream>
#include <iostream>
#include <iomanip>
#include "visual_odometry.h"

int main(int argc, char *argv[])
{
	//定义相机
	PinholeCamera *cam = new PinholeCamera(1241.0, 376.0,
		718.8560, 718.8560, 607.1928, 185.2157);//为什么要用对象指针？
	//初始化对象vo
	VisualOdometry vo(cam);
	// 用于保存轨迹数据
	std::ofstream out("position.txt");
	// 创建窗体用于显示读取的图片以及显示轨迹
	char text[100];
	int font_face = cv::FONT_HERSHEY_PLAIN;
	double font_scale = 1;
	int thickness = 1;
	cv::Point text_org(10, 50);
	cv::namedWindow("Road facing camera", cv::WINDOW_AUTOSIZE);
	cv::namedWindow("Trajectory", cv::WINDOW_AUTOSIZE);
	cv::Mat traj = cv::Mat::zeros(600, 600, CV_8UC3);// 用于绘制轨迹

	double x=0.0, y=0.0,z=0.0;// 用于保存轨迹 
	for (int img_id = 0; img_id < 2000; ++img_id)
	{
		// 导入图像
		std::stringstream ss;
		ss <<  "F:/SLAM/dataset/KITTI/data_odometry_gray/00/image_1/"//单右斜线
			<< std::setw(6) << std::setfill('0') << img_id << ".png";//靠右对齐，字符长6，左边用0补充

		cv::Mat img(cv::imread(ss.str().c_str(), 0));
		assert(!img.empty());//如果没有照片就返回错误，终止程序

		// 处理帧
		vo.addImage(img, img_id);
		cv::Mat cur_t = vo.getCurrentT();
		if (cur_t.rows!=0)
		{
			x = cur_t.at<double>(0);
			y = cur_t.at<double>(1);
			z = cur_t.at<double>(2);
		}
		out << x << " " << y << " " << z << std::endl;
		//中心点
		int draw_x = int(x) + 300;
		int draw_y = int(z) + 100;
		cv::circle(traj, cv::Point(draw_x, draw_y), 1, CV_RGB(255, 0, 0), 2);

		cv::rectangle(traj, cv::Point(10, 30), cv::Point(580, 60), CV_RGB(0, 0, 0), CV_FILLED);
		sprintf(text, "Coordinates: x = %02fm y = %02fm z = %02fm", x, y, z);
		cv::putText(traj, text, text_org, font_face, font_scale, cv::Scalar::all(255), thickness, 8);

		cv::imshow("Road facing camera", img);
		cv::imshow("Trajectory", traj);

		cv::waitKey(1);
	}

	delete cam;
	out.close();
	getchar();
	return 0;
}

导入照片：

std::stringstream ss;
ss <<  "F:/SLAM/dataset/image_1/"<< std::setw(6) << std::setfill('0') << img_id<< ".png";
cv::Mat img(cv::imread(ss.str().c_str(), 0));
assert(!img.empty());//如果没有照片就返回错误，终止程序

其中setw(6)表示图片名称是6个字符长，从右开始数，多余部分用0补充，例如：000006.png

ss.str().c_str()将string类型转为char*，为了与C兼容。

相机初始化：

//定义相机
PinholeCamera *cam = new PinholeCamera(1241.0, 376.0,
         718.8560, 718.8560, 607.1928, 185.2157);//为什么要用对象指针？
//初始化对象vo
VisualOdometry vo(cam);

在VisualOdometry类中对于相机定义是：

VisualOdometry::VisualOdometry(PinholeCamera* cam):cam_(cam)
{
	focal_ = cam_->fx();
	pp_ = cv::Point2d(cam_->cx(), cam_->cy());
	frame_stage_ = STAGE_FIRST_FRAME;
}

相机之后要使用的参数主要是：焦距、相机中心位移、当前帧状态。

关键的处理帧这里只用到了addImage与getCurrentT()，下一节讨论
画面呈现

if (cur_t.rows!=0)
{
	x = cur_t.at<double>(0);
	y = cur_t.at<double>(1);
	z = cur_t.at<double>(2);
}
int draw_x = int(x) + 300;
int draw_y = int(z) + 100;
cv::circle(traj, cv::Point(draw_x, draw_y), 1, CV_RGB(255, 0, 0), 2);
cv::rectangle(traj, cv::Point(10, 30), cv::Point(580, 60), CV_RGB(0, 0, 0), CV_FILLED);
sprintf(text, "Coordinates: x = %02fm y = %02fm z = %02fm", x, y, z);
cv::putText(traj, text, text_org, font_face, font_scale, cv::Scalar::all(255), thickness, 8);
cv::imshow("Road facing camera", img);
cv::imshow("Trajectory", traj);

要想把轨迹画出来，先将x转为int类型，之后(300,100)是起始点的坐标。

cv::Mat traj = cv::Mat::zeros(600, 600, CV_8UC3);// 用于绘制轨迹，在视频当中看到黑色的画布。

扫描二维码关注公众号，回复： 4082821 查看本文章

sprintf输出x,y,z的文本以及在画面上呈现文字putText().

visual_odometry.h

#ifndef VISUAL_ODOMETRY_H_
#define VISUAL_ODOMETRY_H_

#include <vector>
#include <opencv2/opencv.hpp>
#include "pinhole_camera.h"

class VisualOdometry
{
public:
	//目前所在帧，通过限制条件更好的确定第一帧和第二帧图像用于确定初始位置
	enum FrameStage {
		STAGE_FIRST_FRAME,//第一帧
		STAGE_SECOND_FRAME,//第二帧
		STAGE_DEFAULT_FRAME//默认帧
	};
	//构造函数，析构函数
	VisualOdometry(PinholeCamera* cam);
	virtual ~VisualOdometry();

	/// 提供一个图像
	void addImage(const cv::Mat& img, int frame_id);
	/// 获取当前帧的旋转
	cv::Mat getCurrentR() { return cur_R_; }
	/// 获得当前帧的平移
	cv::Mat getCurrentT() { return cur_t_; }

protected: 
	/// 处理第一帧
	virtual bool processFirstFrame();
	/// 处理第二帧
	virtual bool processSecondFrame();
	/// 处理完开始的两个帧后处理所有帧
	virtual bool processFrame(int frame_id);
	/// 计算绝对尺度
	double getAbsoluteScale(int frame_id);
	/// 特征检测
	void featureDetection(cv::Mat image, std::vector<cv::Point2f> &px_vec);
	/// 特征跟踪
	void featureTracking(cv::Mat image_ref, cv::Mat image_cur,
		std::vector<cv::Point2f>& px_ref, std::vector<cv::Point2f>& px_cur, std::vector<double>& disparities);

protected:
	FrameStage frame_stage_;                 //!< 当前为第几帧
	PinholeCamera *cam_;                     //!< 相机
	cv::Mat new_frame_;                      //!< 当前帧
	cv::Mat last_frame_;                     //!< 最近处理帧

	cv::Mat cur_R_;//!< 计算出当前的姿态，后期将图像和姿态进行封装为帧
	cv::Mat cur_t_;//!< 当前的平移

	std::vector<cv::Point2f> px_ref_;      //!< 在参考帧中用于跟踪的特征点
	std::vector<cv::Point2f> px_cur_;      //!< 在当前帧中跟踪的特征点
	std::vector<double> disparities_;      //!< 第一帧与第二帧对应光流跟踪的特征之间的像素差值

	double focal_;//!<相机焦距
	cv::Point2d pp_; //!<相机中心点

};

#endif // VISUAL_ODOMETRY_H_

先从addImage开始

void VisualOdometry::addImage(const cv::Mat& img, int frame_id)
{
	//对添加的图像进行判断
	if (img.empty() || img.type() != CV_8UC1 || img.cols != cam_->width() || img.rows != cam_->height())
		throw std::runtime_error("Frame: provided image has not the same size as the camera model or image is not grayscale");
	// 添加新帧 
	new_frame_ = img;
	bool res = true;//结果状态
	if (frame_stage_ == STAGE_DEFAULT_FRAME)
		res = processFrame(frame_id);
	else if (frame_stage_ == STAGE_SECOND_FRAME)
		res = processSecondFrame();
	else if (frame_stage_ == STAGE_FIRST_FRAME)
		res = processFirstFrame();
	// 将当前帧设为上一处理帧
	last_frame_ = new_frame_;

}

根据当前帧的状态分别放进三个函数里，第一帧、第二帧、正常帧

正常帧processFrame

//之后的帧R、t要累加
bool VisualOdometry::processFrame(int frame_id)
{
	double scale = 1.00;//初始尺度为1
	featureTracking(last_frame_, new_frame_, px_ref_, px_cur_, disparities_); //通过光流跟踪确定第二帧中的相关特征
	cv::Mat E, R, t, mask;
	E = cv::findEssentialMat(px_cur_, px_ref_, focal_, pp_, cv::RANSAC, 0.999, 1.0, mask);
	cv::recoverPose(E, px_cur_, px_ref_, R, t, focal_, pp_, mask);
	scale = getAbsoluteScale(frame_id);//得到当前帧的实际尺度
	if (scale > 0.1) //如果尺度小于0.1可能计算出的Rt存在一定的问题,则不做处理，保留上一帧的值
	{
		cur_t_ = cur_t_ + scale*(cur_R_*t);
		cur_R_ = R*cur_R_;
	}
	// 如果跟踪特征点数小于给定阈值，进行重新特征检测
	if (px_ref_.size() < kMinNumFeature)
	{
		featureDetection(new_frame_, px_ref_);
		featureTracking(last_frame_, new_frame_, px_ref_, px_cur_, disparities_);
	}
	//当前特征点变为参考帧特征点
	px_ref_ = px_cur_;
	return true;
}

第一帧processFirstFrame()

// 第一帧只有特征检测，计算出来的特征为参考帧特征点
bool VisualOdometry::processFirstFrame()
{
	featureDetection(new_frame_, px_ref_);
	// 修改状态，表明第一帧已经处理完成
	frame_stage_ = STAGE_SECOND_FRAME;
	return true;
}

第二帧processSecondFrame()

// 第二帧算出第一次R，t，特征跟踪、基础矩阵、恢复位姿
bool VisualOdometry::processSecondFrame()
{
	featureTracking(last_frame_, new_frame_, px_ref_, px_cur_, disparities_); //通过光流跟踪确定第二帧中的相关特征
	cv::Mat E, R, t, mask;
	E = cv::findEssentialMat(px_cur_, px_ref_, focal_, pp_, cv::RANSAC, 0.999, 1.0, mask);
	cv::recoverPose(E, px_cur_, px_ref_, R, t, focal_, pp_, mask);
	cur_R_ = R.clone();
	cur_t_ = t.clone();
	// 设置状态处理默认帧
	frame_stage_ = STAGE_DEFAULT_FRAME;
	//当前特征点变为参考帧特征点
	px_ref_ = px_cur_;
	return true;
}

作者开源的代码可以跑通，注释为：


#include "vo_features.h"

using namespace cv;
using namespace std;
//define宏定义，全局变量
#define MAX_FRAME 1000
#define MIN_NUM_FEAT 2000
//获得绝对尺度？？？
double getAbsoluteScale(int frame_id, int sequence_id, double z_cal)	{
  
  string line;
  int i = 0;
  ifstream myfile ("/home/avisingh/Datasets/KITTI_VO/00.txt");
  double x =0, y=0, z = 0;
  double x_prev, y_prev, z_prev;
  if (myfile.is_open())
  {
    while (( getline (myfile,line) ) && (i<=frame_id))
    {
      z_prev = z;
      x_prev = x;
      y_prev = y;
      std::istringstream in(line);
      //cout << line << '\n';
      for (int j=0; j<12; j++)  {
        in >> z ;
        if (j==7) y=z;
        if (j==3)  x=z;
      }
      
      i++;
    }
    myfile.close();
  }

  else {
    cout << "Unable to open file";
    return 0;
  }

  return sqrt((x-x_prev)*(x-x_prev) + (y-y_prev)*(y-y_prev) + (z-z_prev)*(z-z_prev)) ;

}


int main( int argc, char** argv )	

  Mat img_1, img_2;
  Mat R_f, t_f;
  //生成输出的文件
  ofstream myfile;
  myfile.open ("results1_1.txt");
  //后面会采用帧数数据计算两帧之间距离作为scale
  double scale = 1.00;
  //先创建数组，sprintf()将照片输出到指定指针,前两张图像
  char filename1[200];
  char filename2[200];
  sprintf(filename1, "/home/avisingh/Datasets/KITTI_VO/00/image_2/%06d.png", 0);
  sprintf(filename2, "/home/avisingh/Datasets/KITTI_VO/00/image_2/%06d.png", 1);
  //读数据集前两帧
  Mat img_1_c = imread(filename1);
  Mat img_2_c = imread(filename2);

  if ( !img_1_c.data || !img_2_c.data ) { 
    std::cout<< " --(!) Error reading images " << std::endl; return -1;
  }
  // 转为灰度图像
  cvtColor(img_1_c, img_1, COLOR_BGR2GRAY);
  cvtColor(img_2_c, img_2, COLOR_BGR2GRAY);
  // 特征检测，追踪
  vector<Point2f> points1, points2;        //vectors to store the coordinates of the feature points
  featureDetection(img_1, points1);        //detect features in img_1
  vector<uchar> status;
  featureTracking(img_1,img_2,points1,points2, status); //track those features to img_2
  //相机参数
  double focal = 718.8560;
  cv::Point2d pp(607.1928, 185.2157);
  //找E，恢复第二帧位姿T[R|t]
  Mat E, R, t, mask;
  E = findEssentialMat(points2, points1, focal, pp, RANSAC, 0.999, 1.0, mask);
  recoverPose(E, points2, points1, R, t, focal, pp, mask);
  R_f = R.clone();
  t_f = t.clone();
  // 创建窗体用于显示读取的图片以及显示轨迹
  char text[100];
  int fontFace = FONT_HERSHEY_PLAIN;
  double fontScale = 1;
  int thickness = 1;
  cv::Point textOrg(10, 50);
  namedWindow( "Road facing camera", WINDOW_AUTOSIZE );// 创建窗口
  namedWindow( "Trajectory", WINDOW_AUTOSIZE );
  Mat traj = Mat::zeros(600, 600, CV_8UC3);//创建黑背景
  //以第二帧作为循环的第一帧图像
  Mat prevImage = img_2;
  Mat currImage;
  vector<Point2f> prevFeatures = points2;
  vector<Point2f> currFeatures;
  char filename[100];
  for(int numFrame=2; numFrame < MAX_FRAME; numFrame++)	{
  	sprintf(filename, "/home/avisingh/Datasets/KITTI_VO/00/image_2/%06d.png", numFrame);
    //cout << numFrame << endl;
  	Mat currImage_c = imread(filename);
  	cvtColor(currImage_c, currImage, COLOR_BGR2GRAY);
  	vector<uchar> status;
  	featureTracking(prevImage, currImage, prevFeatures, currFeatures, status);
  	E = findEssentialMat(currFeatures, prevFeatures, focal, pp, RANSAC, 0.999, 1.0, mask);
  	recoverPose(E, currFeatures, prevFeatures, R, t, focal, pp, mask);
	//特征点化为二维像素坐标(xy)，有什么用吗？？？可以直接删掉
    Mat prevPts(2,prevFeatures.size(), CV_64F), currPts(2,currFeatures.size(), CV_64F);
    for(int i=0;i<prevFeatures.size();i++)	{   
  		prevPts.at<double>(0,i) = prevFeatures.at(i).x;
  		prevPts.at<double>(1,i) = prevFeatures.at(i).y;

  		currPts.at<double>(0,i) = currFeatures.at(i).x;
  		currPts.at<double>(1,i) = currFeatures.at(i).y;
    }
	//获得绝对尺度，最后一个参数是z
  	scale = getAbsoluteScale(numFrame, 0, t.at<double>(2));
	//z坐标大于x y且尺度大于0.1？？？
    if ((scale>0.1)&&(t.at<double>(2) > t.at<double>(0)) && (t.at<double>(2) > t.at<double>(1))) {
      t_f = t_f + scale*(R_f*t);
      R_f = R*R_f; 
    }
    else {
     cout << "scale below 0.1, or incorrect translation" << endl;
    }
    
   //输出轨迹数据，形式如00.txt
   myfile << t_f.at<double>(0) << " " << t_f.at<double>(1) << " " << t_f.at<double>(2) << endl;

  // 如果被跟踪的特征数量低于特定阈值，则触发重新检测
 	  if (prevFeatures.size() < MIN_NUM_FEAT)	{
      cout << "Number of tracked features reduced to " << prevFeatures.size() << endl;
      cout << "trigerring redection" << endl;
 	  featureDetection(prevImage, prevFeatures);
      featureTracking(prevImage,currImage,prevFeatures,currFeatures, status);
 	  }
    prevImage = currImage.clone();
    prevFeatures = currFeatures;
	//绘制轨迹，为什么y的是第三个z，汽车相机朝向正前方
	//Mat.at<>()访问元素
    int x = int(t_f.at<double>(0)) + 300;
    int y = int(t_f.at<double>(2)) + 100;
	//画圆(图像，圆心坐标，半径,红色，线条粗细)
    circle(traj, Point(x, y) ,1, CV_RGB(255,0,0), 2);
	//矩形（图像，对角的顶点，黑色，填充），放文字前先用矩形覆盖住。
    rectangle( traj, Point(10, 30), Point(550, 50), CV_RGB(0,0,0), CV_FILLED);
	//坐标实时变化
    sprintf(text, "Coordinates: x = %02fm y = %02fm z = %02fm", t_f.at<double>(0), t_f.at<double>(1), t_f.at<double>(2));
	//放文字（图像你，文字内容，位置，字体类型，颜色白，厚度）
    putText(traj, text, textOrg, fontFace, fontScale, Scalar::all(255), thickness, 8);

    imshow( "Road facing camera", currImage_c );
    imshow( "Trajectory", traj );

    waitKey(1);

  }
  //计时结束，转化为以秒为单位
  clock_t end = clock();
  double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
  cout << "Total time taken: " << elapsed_secs << "s" << endl;

  //cout << R_f << endl;
  //cout << t_f << endl;

  return 0;
}

先要匹配上对应特征点，之后恢复[R|t] 关键代码为：

E = findEssentialMat(currFeatures, prevFeatures, focal, pp, RANSAC, 0.999, 1.0, mask);
recoverPose(E, currFeatures, prevFeatures, R, t, focal, pp, mask);

为了下面的式子成立，需要提前通过前两帧求得[R|t],得到 R_f = R.clone(); t_f = t.clone();

t_f = t_f + scale*(R_f*t);

R_f = R*R_f;

特征检测用的Fast角点，跟踪使用KLT光流法，但是这里的代码显得很不友好，还是用之前SLAM十四讲里的清晰些。

接下来要抓紧把后端以及见图部分搞定，deadline是这个月末，已经拖了很久了，像师兄讲的你得先快速的过一遍然后再找感兴趣的点进行深究。现在只是前端内容还远远不够。