ILSは、私が作成した新しいアルゴリズムです。
しかし、その理論とそれを採用し、すべてのトリックは、既存のアイデアから来ます。
RANSACは、昔ながらの加工への学習(特に線形学習)アルゴリズムであり、
かなりうまく動作するように証明されました。
ILSは、次のように提示されています。
- 仮定データ\(D \)は、ここで私は悪い意味で、良い面と悪い面の両方のサンプルが含まれているサンプルが与えられた精密な境界を持つ線形方程式をsatisifyingされていないということを言っています。そして、それ以外の場合は良いサンプルでなければなりません。これはオプションの一つであるとします。(**)
- 悪いサンプルは余分な分散が観察(サンプリング)を変更しないことを意味(ガウス分布など)公平な分布であると仮定する。これはneccessaryであると仮定します。
- ターゲット線形Equition仮定\(AX = Bの\に)実数フィールドに存在し、ない\(X \)を意味し、非線形空間から変換することができる任意のベクターであることができる、潜在変数が存在する可能性が\(Y \ )、satisifying \(Y \ LEFTARROW F(X)\) 、\(Y軸\)は、エンド観察として提示され、のようなものは存在しない可能性が\(X \ LEFTARROW F ^ { - 1}(y)を\) 、このような非線形スペースので、おそらく戻って線形空間にそれを変換する一意のinvserseマップ関係を持ちません。このように、ニューラルネットワークのような訓練されたモデルは、の悪い予測または推論得ることができる\(x \)をに従って\(Y軸\) 、およびそのような(Xの\)\ 我々の前になって、線形方程式をsatisifyません。
- イテレーションプロセス:I。解決\(A \)と\の(b \)最小二乗アルゴリズムを用いて、ベクターにバイアス1を加える(\ VECは、{X} \)\として方程式を変換する\ \(\左[{行列} A&Bの開始\端{行列} \右] \左[\開始{行列} \ VEC {X} \\ 1 \端{行列} \右] = 0 \) 。これは簡単に単にの寸法の逆行列を解くことによって行われる\(Xの\) 1を加えました。ので\(Xの\)は、隠れ変異体から採取され、その寸法が小さく、全く次元の呪いは生じません。II。各試料を判断\(Xの\)残差の絶対値に応じて\(|| AX-B || \)、トップNを見つける、Nは単にそのサンプル量に応じて、3のような小さな数に設定することができます。III。平均残差(又は最大のもの)は、精度制御下にある場合、このラウンドによって解決されるように、その後、良いAの溶液およびBでこの反復及び出口を停止します。それ以外の場合は、先に行きます。IV。上位N残差のサンプルを取り除く\(X \) 。V。ステップiと、もう一度再起動してください。
行に沿ってCONCATENATE二つの画像に使用される単純なアプリケーションは、ここで提示され、このコードは、この要件に注意を払う、商業的な使用のためではありません!
#include "OpenCVconfig.h"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/core.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/ximgproc/edge_filter.hpp"
#include <stdio.h>
using namespace cv;
using namespace cv::ximgproc;
using namespace std;
#ifndef __DEBUG__
#define __DEBUG__
#endif
#ifndef __DUMP_ENABLED__
#define __DUMP_ENABLED__
#endif
#ifndef NUM_OF_FEATURES
#define NUM_OF_FEATURES 1000
#endif
#ifndef MAX_ITER
#define MAX_ITER 300
#endif
#ifndef RESIDUAL_LEVEL
#define RESIDUAL_LEVEL 0.5
#endif
#ifndef WORST_N_CASE
#define WORST_N_CASE 3
#endif
void VisualizeOpticalFlow(
cv::Mat & output,
const std::vector<cv::Point2f> & src,
const std::vector<cv::Point2f> & dst) {
float dx, dy, angle;
if (src.size() != dst.size()) {
std::cout << "Error: source point number dose not match that of destination" << std::endl;
exit(-1);
}
for (int i = 0; i<dst.size(); i++) {
dy = dst[i].y - src[i].y;
dx = dst[i].x - src[i].x;
angle = atan(dy / (1.0 + dx)) / 3.1415926 * 180;
if (angle < 0) {
cv::circle(output, src[i], -angle / 2, cv::Scalar(0, 100, 200), 1);
cv::arrowedLine(output, src[i], dst[i], cv::Scalar(100, 200, 0), 1, 8, 0);
}
else {
cv::circle(output, src[i], angle / 2, cv::Scalar(0, 200, 100), 1);
cv::arrowedLine(output, src[i], dst[i], cv::Scalar(200, 100, 0), 1, 8, 0);
}
}
}
int AlignViews(
cv::Mat & im1_aligned,
cv::Mat & im2_aligned,
const cv::Mat & im1,
const cv::Mat & im2)
{
// since we only apply transformation to the right view,
// thus the first view is kept as a copy of the input.
im1.copyTo(im1_aligned);
cv::Mat M;
float fx, fy, ppx, ppy;
ppx = im1.size().width / 2.0;
ppy = im1.size().height / 2.0;
fx = 1;
fy = 1;
M.create(3, 3, CV_32F);
M.at<float>(0, 0) = fx;
M.at<float>(0, 1) = 0;
M.at<float>(0, 2) = ppx;
M.at<float>(1, 0) = 0;
M.at<float>(1, 1) = fy;
M.at<float>(1, 2) = ppy;
M.at<float>(2, 0) = 0;
M.at<float>(2, 1) = 0;
M.at<float>(2, 2) = 1;
std::vector<cv::Point2f> pts1, pts2;
cv::Mat tracker_im1, tracker_im2;
cv::Mat mask;
int total;
int num_good;
std::vector<cv::Point2f> pts1_good, pts2_good;
cv::Mat map11, map12, map21, map22;
int i, iter_;
int max_iter = MAX_ITER;
int ransac_used;
float min_[2], max_[2], miu_[3], sigma_[3];
float sum_[3], sum_of_square[3];
int nSigma = 3;
cv::Ptr<cv::ORB> orb_ = cv::ORB::create(NUM_OF_FEATURES, 1.2, 16, 31, 0, 2, 0, 31, 20);
Mat d_descriptorsL, d_descriptorsR;
vector<KeyPoint> keyPoints_1, keyPoints_2;
Ptr<DescriptorMatcher> d_matcher = DescriptorMatcher::create(cv::NORM_L2);
std::vector<DMatch> matches;
std::vector<DMatch> good_matches;
pts1.resize(0);
pts2.resize(0);
orb_->detectAndCompute(im1, Mat(), keyPoints_1, d_descriptorsL);
orb_->detectAndCompute(im2, Mat(), keyPoints_2, d_descriptorsR);
d_matcher->match(d_descriptorsL, d_descriptorsR, matches);
int sz = matches.size();
double max_dist = 0;
float select_ratio = 0.5;
for (int i = 0; i < sz; i++) {
if (matches[i].distance > max_dist)
max_dist = matches[i].distance;
}
std::vector<KeyPoint> sel_kp1, sel_kp2;
sel_kp1.resize(0);
sel_kp2.resize(0);
cv::Point2f pt1, pt2;
for (int i = 0; i < sz; i++) {
if (matches[i].distance < select_ratio*max_dist) {
pt1 = keyPoints_1[matches[i].queryIdx].pt;
pt2 = keyPoints_2[matches[i].trainIdx].pt;
if (abs(pt1.y - pt2.y) < im1.size().height * 0.02 &&
abs(pt1.x - pt2.x) < im1.size().width * 0.04 &&
pt1.x - pt2.x < 0) {
pts1.push_back(pt1);
pts2.push_back(pt2);
good_matches.push_back(cv::DMatch(sel_kp1.size(), sel_kp2.size(), 0.1));
sel_kp1.push_back(keyPoints_1[matches[i].queryIdx]);
sel_kp2.push_back(keyPoints_2[matches[i].trainIdx]);
}
}
}
#ifdef __DEBUG__
Mat ShowGoodMatches;
drawMatches(im1, sel_kp1, im2, sel_kp2, good_matches, ShowGoodMatches);
cv::imwrite("good_matches.jpg", ShowGoodMatches);
im1.copyTo(tracker_im1);
VisualizeOpticalFlow(tracker_im1, pts1, pts2);
cv::imwrite("tracker1.jpg", tracker_im1);
std::cout << "number of points: " << pts1.size() << std::endl;
getchar();
#endif
pts1_good.resize(pts1.size());
pts2_good.resize(pts2.size());
// do some analysis to these selected points
min_[0] = min_[1] = 1e5;
max_[0] = max_[1] = -1e5;
sum_[0] = sum_[1] = sum_[2] = 0;
sum_of_square[0] = sum_of_square[1] = sum_of_square[2] = 0;
float dx, dy;
for (i = 0; i < pts1.size(); ++i) {
dx = pts2[i].x - pts1[i].x;
dy = pts2[i].y - pts1[i].y;
sum_[0] += dx;
sum_[1] += dy;
sum_[2] += atan(dy / (dx + 1));
if (dx < min_[0]) min_[0] = dx;
if (dx > max_[0]) max_[0] = dx;
if (dy < min_[1]) min_[1] = dy;
if (dy > max_[1]) max_[1] = dy;
}
miu_[0] = sum_[0] / pts1.size();
miu_[1] = sum_[1] / pts1.size();
miu_[2] = sum_[2] / pts1.size();
for (i = 0; i < pts1.size(); ++i) {
dx = pts2[i].x - pts1[i].x;
dy = pts2[i].y - pts1[i].y;
sum_of_square[0] += (dx - miu_[0]) * (dx - miu_[0]);
sum_of_square[1] += (dy - miu_[1]) * (dy - miu_[1]);
sum_of_square[2] += (atan(dy / (dx + 1)) - miu_[2]) * (atan(dy / (dx + 1)) - miu_[2]);
}
sigma_[0] = sqrt(sum_of_square[0] / (pts1.size() - 1));
sigma_[1] = sqrt(sum_of_square[1] / (pts1.size() - 1));
sigma_[2] = sqrt(sum_of_square[2] / (pts1.size() - 1));
pts1_good.resize(pts1.size());
pts2_good.resize(pts2.size());
// filter out the point pairs using some constraints like 3-sigma principle
for (i = 0, num_good = 0; i < pts1.size(); ++i) {
dx = pts2[i].x - pts1[i].x;
dy = pts2[i].y - pts1[i].y;
if (abs(dx - miu_[0]) < nSigma * sigma_[0] &&
abs(dy - miu_[1]) < nSigma * sigma_[1] &&
abs(atan(dy / (dx + 1)) - miu_[2]) < nSigma * sigma_[2]) {
pts1_good[num_good] = pts1[i];
pts2_good[num_good] = pts2[i];
++num_good;
}
}
pts1_good.resize(num_good);
pts2_good.resize(num_good);
std::swap(pts1, pts1_good);
std::swap(pts2, pts2_good);
#ifdef __DEBUG__
// visualize which bunch of points are removed out of candidates
im1.copyTo(tracker_im1);
VisualizeOpticalFlow(tracker_im1, pts1, pts2);
cv::imwrite("tracker2.jpg", tracker_im1);
std::cout << "number of points: " << pts1.size() << std::endl;
getchar();
#endif
// solve row-aligning transform matrix with dynamic training samples
int j;
float u, v, u1, v1;
float a21, a22, a23, a31, a32;
int cntr;
float max_res[WORST_N_CASE], avg_res, this_res;
std::vector<unsigned char> mask_accepted;
int worst_pairs[WORST_N_CASE], total_pairs;
Mat mat_x, vec_y, vec_beta, vec_betaT, mat_xT, mat_xTx, mat_xTx_inv, vec_res;
mat_x.create(pts1.size(), 5, CV_32F);
vec_y.create(pts1.size(), 1, CV_32F);
vec_beta.create(5, 1, CV_32F);
float * ptr_x = (float*)mat_x.data;
float * ptr_y = (float*)vec_y.data;
mask_accepted.resize(pts1.size());
ransac_used = pts1.size();
avg_res = 1e5;
if (ransac_used < WORST_N_CASE + 10) {
im1.copyTo(im1_aligned);
im2.copyTo(im2_aligned);
return 0; // failed with auto alignment
}
while (avg_res > RESIDUAL_LEVEL) {
mat_x.resize(ransac_used);
vec_y.resize(ransac_used);
for (i = 0; i < ransac_used; ++i) {
v1 = (pts1[i].y - ppy) / fy;
u = (pts2[i].x - ppx) / fx;
v = (pts2[i].y - ppy) / fy;
ptr_x[i * 5] = u;
ptr_x[i * 5 + 1] = v;
ptr_x[i * 5 + 2] = 1;
ptr_x[i * 5 + 3] = -u * v1;
ptr_x[i * 5 + 4] = -v * v1;
ptr_y[i] = v1;
}
cv::transpose(mat_x, mat_xT);
mat_xTx = mat_xT * mat_x;
if (cv::invert(mat_xTx, mat_xTx_inv) == 0) {
// this approach failed with bad selecting point pairs
im1.copyTo(im1_aligned);
im2.copyTo(im2_aligned);
return 0; // failed with auto alignment
}
vec_beta = mat_xT * vec_y;
vec_beta = mat_xTx_inv * vec_beta;
// apply the solved model to all samples to find out the worst N cases
vec_res = mat_x * vec_beta;
vec_res = cv::abs(vec_res - vec_y);
avg_res = 0;
max_res[0] = 0;
cntr = 0;
for (i = 0; i < ransac_used; ++i) {
this_res = vec_res.at<float>(i);
avg_res += this_res;
if (this_res >= max_res[0]) {
for (j = WORST_N_CASE - 1; j > 0; --j) {
max_res[j] = max_res[j - 1];
worst_pairs[j] = worst_pairs[j - 1];
}
max_res[0] = this_res;
worst_pairs[0] = i;
++cntr;
}
}
// update average residual error for this round
avg_res = avg_res / ransac_used;
// filter out these worst N cases
for (i = 0; i < ransac_used; ++i) {
mask_accepted[i] = 1;
}
cntr = cntr < WORST_N_CASE ? cntr : WORST_N_CASE;
for (i = 0; i < cntr; ++i) {
mask_accepted[worst_pairs[i]] = 0;
}
total_pairs = ransac_used;
ransac_used = 0;
for (i = 0; i < total_pairs; ++i) {
if (mask_accepted[i]) {
pts1_good[ransac_used] = pts1[i];
pts2_good[ransac_used] = pts2[i];
++ransac_used;
}
}
pts1_good.resize(ransac_used);
pts2_good.resize(ransac_used);
std::swap(pts1, pts1_good);
std::swap(pts2, pts2_good);
// check if the points are enough
if (ransac_used < 5) {
im1.copyTo(im1_aligned);
im2.copyTo(im2_aligned);
return 0; // faild with no enough points
}
}
#ifdef __DEBUG__
// visualize which bunch of points are removed out of candidates
im1.copyTo(tracker_im1);
VisualizeOpticalFlow(tracker_im1, pts1, pts2);
cv::imwrite("tracker3.jpg", tracker_im1);
std::cout << "number of points: " << pts1.size() << std::endl;
getchar();
#endif
a21 = vec_beta.at<float>(0);
a22 = vec_beta.at<float>(1);
a23 = vec_beta.at<float>(2);
a31 = vec_beta.at<float>(3);
a32 = vec_beta.at<float>(4);
cv::Mat mat_row_align;
mat_row_align.create(3, 3, CV_32F);
mat_row_align.at<float>(0, 0) = 1;
mat_row_align.at<float>(0, 1) = 0;
mat_row_align.at<float>(0, 2) = 0;
mat_row_align.at<float>(1, 0) = a21;
mat_row_align.at<float>(1, 1) = a22;
mat_row_align.at<float>(1, 2) = a23;
mat_row_align.at<float>(2, 0) = a31;
mat_row_align.at<float>(2, 1) = a32;
mat_row_align.at<float>(2, 2) = 1;
// Use Shear Transformation to restore the image in good shape
// find the center points on 4 borders transformed using row-aligning matrix
cv::Point2f centers[4];
float a11, a12, a13;
float h, w;
h = im2.size().height;
w = im2.size().width;
centers[0].x = (w - 1) / 2.0;
centers[0].y = 0;
centers[1].x = w - 1;
centers[1].y = (h - 1) / 2.0;
centers[2].x = (w - 1) / 2.0;
centers[2].y = h - 1;
centers[3].x = 0;
centers[3].y = (h - 1) / 2.0;
// apply the row-align transform
for (i = 0; i < 4; ++i) {
u = (centers[i].x - ppx) / fx;
v = (centers[i].y - ppy) / fy;
centers[i].x = (u / (a31 * u + a31 * v + 1)) * fx + ppx;
centers[i].y = ((a21 * u + a22 * v + a23) / (a31 * u + a31 * v + 1)) * fy + ppy;
}
cv::Point2f u_, v_;
u_ = centers[1] - centers[3];
v_ = centers[0] - centers[2];
a11 = (h * h * u_.y * u_.y + w * w * v_.y * v_.y) / (h * w * (u_.y * v_.x - u_.x * v_.y));
a12 = (h * h * u_.x * u_.y + w * w * v_.x * v_.y) / (h * w * (u_.x * v_.y - u_.y * v_.x));
// update all the transform the points of right view
float min_disp = 1e5;
for (i = 0; i < ransac_used; ++i) {
u = (pts2[i].x - ppx) / fx;
v = (pts2[i].y - ppy) / fy;
u1 = (pts1[i].x - ppx) / fx;
u = u / (a31 * u + a31 * v + 1);
v = (a21 * u + a22 * v + a23) / (a31 * u + a31 * v + 1);
u = a11 * u + a12 * v;
if (u - u1 < min_disp) min_disp = u - u1;
}
min_disp = fmin(0, min_disp);
a13 = -min_disp;
// code here to get shear matrix for the right view
cv::Mat mat_shear;
mat_shear.create(3, 3, CV_32F);
mat_shear.at<float>(0, 0) = a11;
mat_shear.at<float>(0, 1) = a12;
mat_shear.at<float>(0, 2) = a13;
mat_shear.at<float>(1, 0) = 0;
mat_shear.at<float>(1, 1) = 1;
mat_shear.at<float>(1, 2) = 0;
mat_shear.at<float>(2, 0) = 0;
mat_shear.at<float>(2, 1) = 0;
mat_shear.at<float>(2, 2) = 1;
cv::Mat mat_final_transform;
mat_final_transform = mat_shear * mat_row_align;
cv::Mat D = cv::Mat::zeros(cv::Size(5, 1), CV_32F);
cv::Mat map_x, map_y;
cv::initUndistortRectifyMap(M, D, mat_final_transform, M, im2.size(), CV_32F, map_x, map_y);
cv::remap(im2, im2_aligned, map_x, map_y, CV_INTER_LINEAR);
return 1;
}