这部分作用可以用一句话概括为:使用直接法最小化图像块重投影残差来获取位姿。
已知:假设的相邻帧位姿 (先初始化为上一帧或单位矩阵),k-1帧观测量:特征点2D投影位置及其深度。
目标:利用直接法最小化图像块重投影误差来获取当前帧位姿 ,其中,红色的为位姿,即优化变量。
直接法基本思想:空间中一空间点在各个视角下成像的灰度短时间内不变。
直接法具体过程如下:
1)准备工作
假设相邻帧之间的位姿 已知,一般初始化为上一相邻时刻的位姿或者假设为单位矩阵。通过之前多帧之间的特征检测以及深度估计,我们已经知道第k-1帧中特征点位置以及它们的深度。
2)重投影
知道 中的某个特征在图像平面的位置(u,v),以及它的深度d,能够将该特征投影到三维空间 ,该三维空间的坐标系是定义在摄像机坐标系的。所以,我们要将它投影到当前帧IkIk中,需要位姿转换 ,得到该点在当前帧坐标系中的三维坐标 。最后通过摄像机内参数,投影到IkIk的图像平面(u′,v′),完成重投影。
3)迭代优化更新位姿
按理来说对于空间中同一个点,被极短时间内的相邻两帧拍到,它的亮度值应该没啥变化。但由于位姿是假设的一个值,所以重投影的点不准确,导致投影前后的亮度值是不相等的。不断优化位姿使得这个残差最小,就能得到优化后的位姿。
将上述过程公式化如下:通过不断优化位姿最小化残差损失函数。
公式中第一步为根据图像位置和深度逆投影到三维空间,第二步将三维坐标点旋转平移到当前帧坐标系下,第三步再将三维坐标点投影回当前帧图像坐标。
实现当中,SVO自己实现了高斯——牛顿法的迭代下降,并且比较取巧地使用了一种反向的求导方式:即雅可比在k-1帧图像上进行估计,而不在k帧上估计。这样做法的好处是在迭代过程中只需计算一次雅可比,其余部分只需更新残差即可(即G-N等式右侧的)。这能够节省一定程度的计算量。另一个好处是,我们能够保证k-1帧的像素具有梯度,但没法保证在k帧上的投影也具有梯度,所以这样做至少能保证像素点的梯度是明显的。
实现当中另一个需要注意的地方是金字塔的处理。这一步估计是从金字塔的顶层开始,把上一层的结果作为下一层估计的初始值,最后迭代到底层的。顶层的分辨率最小,所以这是一个由粗到精的过程(Coarse-to-Fine),使得在运动较大时也能有较好的结果。
下面就是代码部分了。
这部分功能的代码都放在sparse_img_align.cpp和sparse_img_align.h两个文件中。主要的函数也就两个:
1)precomputeReferencePatches:遍历所有特征点
遍历参考帧中所有的特征点,并计算相应的雅克比,缓存下来
void SparseImgAlign::precomputeReferencePatches()
{
const int border = patch_halfsize_+1;
const cv::Mat& ref_img = ref_frame_->img_pyr_.at(level_);
const int stride = ref_img.cols;
const float scale = 1.0f/(1<<level_);
const Vector3d ref_pos = ref_frame_->pos();
const double focal_length = ref_frame_->cam_->errorMultiplier2();
size_t feature_counter = 0;
std::vector<bool>::iterator visiblity_it = visible_fts_.begin();
//遍历参考帧中的每一个feature
for(auto it=ref_frame_->fts_.begin(), ite=ref_frame_->fts_.end();
it!=ite; ++it, ++feature_counter, ++visiblity_it)
{
// 首先判断是否在图像中
const float u_ref = (*it)->px[0]*scale;
const float v_ref = (*it)->px[1]*scale;
const int u_ref_i = floorf(u_ref);
const int v_ref_i = floorf(v_ref);
if((*it)->point == NULL || u_ref_i-border < 0 || v_ref_i-border < 0 || u_ref_i+border >= ref_img.cols || v_ref_i+border >= ref_img.rows)
continue;
*visiblity_it = true;
// cannot just take the 3d points coordinate because of the reprojection errors in the reference image!!!
const double depth(((*it)->point->pos_ - ref_pos).norm());
const Vector3d xyz_ref((*it)->f*depth);
// evaluate projection jacobian
Matrix<double,2,6> frame_jac;
Frame::jacobian_xyz2uv(xyz_ref, frame_jac);
// compute bilateral interpolation weights for reference image
const float subpix_u_ref = u_ref-u_ref_i;
const float subpix_v_ref = v_ref-v_ref_i;
const float w_ref_tl = (1.0-subpix_u_ref) * (1.0-subpix_v_ref);
const float w_ref_tr = subpix_u_ref * (1.0-subpix_v_ref);
const float w_ref_bl = (1.0-subpix_u_ref) * subpix_v_ref;
const float w_ref_br = subpix_u_ref * subpix_v_ref;
size_t pixel_counter = 0;
float* cache_ptr = reinterpret_cast<float*>(ref_patch_cache_.data) + patch_area_*feature_counter;
for(int y=0; y<patch_size_; ++y)
{
uint8_t* ref_img_ptr = (uint8_t*) ref_img.data + (v_ref_i+y-patch_halfsize_)*stride + (u_ref_i-patch_halfsize_);
for(int x=0; x<patch_size_; ++x, ++ref_img_ptr, ++cache_ptr, ++pixel_counter)
{
// precompute interpolated reference patch color
*cache_ptr = w_ref_tl*ref_img_ptr[0] + w_ref_tr*ref_img_ptr[1] + w_ref_bl*ref_img_ptr[stride] + w_ref_br*ref_img_ptr[stride+1];
// we use the inverse compositional: thereby we can take the gradient always at the same position
// get gradient of warped image (~gradient at warped position)
float dx = 0.5f * ((w_ref_tl*ref_img_ptr[1] + w_ref_tr*ref_img_ptr[2] + w_ref_bl*ref_img_ptr[stride+1] + w_ref_br*ref_img_ptr[stride+2])
-(w_ref_tl*ref_img_ptr[-1] + w_ref_tr*ref_img_ptr[0] + w_ref_bl*ref_img_ptr[stride-1] + w_ref_br*ref_img_ptr[stride]));
float dy = 0.5f * ((w_ref_tl*ref_img_ptr[stride] + w_ref_tr*ref_img_ptr[1+stride] + w_ref_bl*ref_img_ptr[stride*2] + w_ref_br*ref_img_ptr[stride*2+1])
-(w_ref_tl*ref_img_ptr[-stride] + w_ref_tr*ref_img_ptr[1-stride] + w_ref_bl*ref_img_ptr[0] + w_ref_br*ref_img_ptr[1]));
// cache the jacobian
jacobian_cache_.col(feature_counter*patch_area_ + pixel_counter) =
(dx*frame_jac.row(0) + dy*frame_jac.row(1))*(focal_length / (1<<level_));
}
}
}
have_ref_patch_cache_ = true;
}
2)computeResiduals:计算残差
计算出残差就可以供主流程去调用,并执行优化了。
double SparseImgAlign::computeResiduals(
const SE3& T_cur_from_ref,
bool linearize_system,
bool compute_weight_scale)
{
// Warp the (cur)rent image such that it aligns with the (ref)erence image
const cv::Mat& cur_img = cur_frame_->img_pyr_.at(level_);
if(linearize_system && display_)
resimg_ = cv::Mat(cur_img.size(), CV_32F, cv::Scalar(0));
if(have_ref_patch_cache_ == false)
precomputeReferencePatches();
// compute the weights on the first iteration
std::vector<float> errors;
if(compute_weight_scale)
errors.reserve(visible_fts_.size());
const int stride = cur_img.cols;
const int border = patch_halfsize_+1;
const float scale = 1.0f/(1<<level_);
const Vector3d ref_pos(ref_frame_->pos());
float chi2 = 0.0;
size_t feature_counter = 0; // is used to compute the index of the cached jacobian
std::vector<bool>::iterator visiblity_it = visible_fts_.begin();
for(auto it=ref_frame_->fts_.begin(); it!=ref_frame_->fts_.end();
++it, ++feature_counter, ++visiblity_it)
{
// check if feature is within image
if(!*visiblity_it)
continue;
// compute pixel location in cur img
const double depth = ((*it)->point->pos_ - ref_pos).norm();
const Vector3d xyz_ref((*it)->f*depth);
const Vector3d xyz_cur(T_cur_from_ref * xyz_ref);
const Vector2f uv_cur_pyr(cur_frame_->cam_->world2cam(xyz_cur).cast<float>() * scale);
const float u_cur = uv_cur_pyr[0];
const float v_cur = uv_cur_pyr[1];
const int u_cur_i = floorf(u_cur);
const int v_cur_i = floorf(v_cur);
// check if projection is within the image
if(u_cur_i < 0 || v_cur_i < 0 || u_cur_i-border < 0 || v_cur_i-border < 0 || u_cur_i+border >= cur_img.cols || v_cur_i+border >= cur_img.rows)
continue;
// compute bilateral interpolation weights for the current image
const float subpix_u_cur = u_cur-u_cur_i;
const float subpix_v_cur = v_cur-v_cur_i;
const float w_cur_tl = (1.0-subpix_u_cur) * (1.0-subpix_v_cur);
const float w_cur_tr = subpix_u_cur * (1.0-subpix_v_cur);
const float w_cur_bl = (1.0-subpix_u_cur) * subpix_v_cur;
const float w_cur_br = subpix_u_cur * subpix_v_cur;
float* ref_patch_cache_ptr = reinterpret_cast<float*>(ref_patch_cache_.data) + patch_area_*feature_counter;
size_t pixel_counter = 0; // is used to compute the index of the cached jacobian
for(int y=0; y<patch_size_; ++y)
{
uint8_t* cur_img_ptr = (uint8_t*) cur_img.data + (v_cur_i+y-patch_halfsize_)*stride + (u_cur_i-patch_halfsize_);
for(int x=0; x<patch_size_; ++x, ++pixel_counter, ++cur_img_ptr, ++ref_patch_cache_ptr)
{
// compute residual
const float intensity_cur = w_cur_tl*cur_img_ptr[0] + w_cur_tr*cur_img_ptr[1] + w_cur_bl*cur_img_ptr[stride] + w_cur_br*cur_img_ptr[stride+1];
const float res = intensity_cur - (*ref_patch_cache_ptr);
// used to compute scale for robust cost
if(compute_weight_scale)
errors.push_back(fabsf(res));
// robustification
float weight = 1.0;
if(use_weights_) {
weight = weight_function_->value(res/scale_);
}
chi2 += res*res*weight;
n_meas_++;
if(linearize_system)
{
// compute Jacobian, weighted Hessian and weighted "steepest descend images" (times error)
const Vector6d J(jacobian_cache_.col(feature_counter*patch_area_ + pixel_counter));
H_.noalias() += J*J.transpose()*weight;
Jres_.noalias() -= J*res*weight;
if(display_)
resimg_.at<float>((int) v_cur+y-patch_halfsize_, (int) u_cur+x-patch_halfsize_) = res/255.0;
}
}
}
}
// compute the weights on the first iteration
if(compute_weight_scale && iter_ == 0)
scale_ = scale_estimator_->compute(errors);
return chi2/n_meas_;
}
评论(0)
您还未登录,请登录后发表或查看评论