AKAZE和ORB平面跟蹤

2018-10-13 11:11 更新

介紹

在本教程中,我們將使用它們比較AKAZE和ORB本地功能,以查找視頻幀和跟蹤對(duì)象移動(dòng)之間的匹配。

算法如下:

  • 檢測(cè)并描述第一幀上的關(guān)鍵點(diǎn),手動(dòng)設(shè)置對(duì)象邊界
  • 對(duì)于每一個(gè)下一幀:
  1. 檢測(cè)并描述關(guān)鍵點(diǎn)
  2. 使用bruteforce匹配器匹配它們
  3. 使用RANSAC估計(jì)單變圖
  4. 過(guò)濾來(lái)自所有比賽的內(nèi)置值
  5. 將單變圖應(yīng)用于邊界框以找到對(duì)象
  6. 繪制邊界框和內(nèi)聯(lián),計(jì)算不定式比值作為評(píng)估指標(biāo)

AKAZE和ORB平面跟蹤

數(shù)據(jù)

要進(jìn)行跟蹤,我們需要第一幀上的視頻和對(duì)象位置。

您可以從這里下載我們的示例視頻和數(shù)據(jù)。

要運(yùn)行代碼,您必須指定輸入(攝像機(jī)ID或video_file)。然后,使用鼠標(biāo)選擇邊框,然后按任意鍵開始跟蹤

./planar_tracking blais.mp4

源代碼

#include <opencv2/features2d.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>      //for imshow
#include <vector>
#include <iostream>
#include <iomanip>
#include "stats.h" // Stats structure definition
#include "utils.h" // Drawing and printing functions
using namespace std;
using namespace cv;
const double akaze_thresh = 3e-4; // AKAZE detection threshold set to locate about 1000 keypoints
const double ransac_thresh = 2.5f; // RANSAC inlier threshold
const double nn_match_ratio = 0.8f; // Nearest-neighbour matching ratio
const int bb_min_inliers = 100; // Minimal number of inliers to draw bounding box
const int stats_update_period = 10; // On-screen statistics are updated every 10 frames
namespace example {
class Tracker
{
public:
    Tracker(Ptr<Feature2D> _detector, Ptr<DescriptorMatcher> _matcher) :
        detector(_detector),
        matcher(_matcher)
    {}
    void setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats);
    Mat process(const Mat frame, Stats& stats);
    Ptr<Feature2D> getDetector() {
        return detector;
    }
protected:
    Ptr<Feature2D> detector;
    Ptr<DescriptorMatcher> matcher;
    Mat first_frame, first_desc;
    vector<KeyPoint> first_kp;
    vector<Point2f> object_bb;
};
void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats)
{
    cv::Point *ptMask = new cv::Point[bb.size()];
    const Point* ptContain = { &ptMask[0] };
    int iSize = static_cast<int>(bb.size());
    for (size_t i=0; i<bb.size(); i++) {
        ptMask[i].x = static_cast<int>(bb[i].x);
        ptMask[i].y = static_cast<int>(bb[i].y);
    }
    first_frame = frame.clone();
    cv::Mat matMask = cv::Mat::zeros(frame.size(), CV_8UC1);
    cv::fillPoly(matMask, &ptContain, &iSize, 1, cv::Scalar::all(255));
    detector->detectAndCompute(first_frame, matMask, first_kp, first_desc);
    stats.keypoints = (int)first_kp.size();
    drawBoundingBox(first_frame, bb);
    putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4);
    object_bb = bb;
    delete[] ptMask;
}
Mat Tracker::process(const Mat frame, Stats& stats)
{
    TickMeter tm;
    vector<KeyPoint> kp;
    Mat desc;
    tm.start();
    detector->detectAndCompute(frame, noArray(), kp, desc);
    stats.keypoints = (int)kp.size();
    vector< vector<DMatch> > matches;
    vector<KeyPoint> matched1, matched2;
    matcher->knnMatch(first_desc, desc, matches, 2);
    for(unsigned i = 0; i < matches.size(); i++) {
        if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) {
            matched1.push_back(first_kp[matches[i][0].queryIdx]);
            matched2.push_back(      kp[matches[i][0].trainIdx]);
        }
    }
    stats.matches = (int)matched1.size();
    Mat inlier_mask, homography;
    vector<KeyPoint> inliers1, inliers2;
    vector<DMatch> inlier_matches;
    if(matched1.size() >= 4) {
        homography = findHomography(Points(matched1), Points(matched2),
                                    RANSAC, ransac_thresh, inlier_mask);
    }
    tm.stop();
    stats.fps = 1. / tm.getTimeSec();
    if(matched1.size() < 4 || homography.empty()) {
        Mat res;
        hconcat(first_frame, frame, res);
        stats.inliers = 0;
        stats.ratio = 0;
        return res;
    }
    for(unsigned i = 0; i < matched1.size(); i++) {
        if(inlier_mask.at<uchar>(i)) {
            int new_i = static_cast<int>(inliers1.size());
            inliers1.push_back(matched1[i]);
            inliers2.push_back(matched2[i]);
            inlier_matches.push_back(DMatch(new_i, new_i, 0));
        }
    }
    stats.inliers = (int)inliers1.size();
    stats.ratio = stats.inliers * 1.0 / stats.matches;
    vector<Point2f> new_bb;
    perspectiveTransform(object_bb, new_bb, homography);
    Mat frame_with_bb = frame.clone();
    if(stats.inliers >= bb_min_inliers) {
        drawBoundingBox(frame_with_bb, new_bb);
    }
    Mat res;
    drawMatches(first_frame, inliers1, frame_with_bb, inliers2,
                inlier_matches, res,
                Scalar(255, 0, 0), Scalar(255, 0, 0));
    return res;
}
}
int main(int argc, char **argv)
{
    CommandLineParser parser(argc, argv, "{@input_path |0|input path can be a camera id, like 0,1,2 or a video filename}");
    parser.printMessage();
    string input_path = parser.get<string>(0);
    string video_name = input_path;
    VideoCapture video_in;
    if ( ( isdigit(input_path[0]) && input_path.size() == 1 ) )
    {
    int camera_no = input_path[0] - '0';
        video_in.open( camera_no );
    }
    else {
        video_in.open(video_name);
    }
    if(!video_in.isOpened()) {
        cerr << "Couldn't open " << video_name << endl;
        return 1;
    }
    Stats stats, akaze_stats, orb_stats;
    Ptr<AKAZE> akaze = AKAZE::create();
    akaze->setThreshold(akaze_thresh);
    Ptr<ORB> orb = ORB::create();
    Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce-Hamming");
    example::Tracker akaze_tracker(akaze, matcher);
    example::Tracker orb_tracker(orb, matcher);
    Mat frame;
    namedWindow(video_name, WINDOW_NORMAL);
    cout << "\nPress any key to stop the video and select a bounding box" << endl;
    while ( waitKey(1) < 1 )
    {
        video_in >> frame;
        cv::resizeWindow(video_name, frame.size());
        imshow(video_name, frame);
    }
    vector<Point2f> bb;
    cv::Rect uBox = cv::selectROI(video_name, frame);
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x), static_cast<float>(uBox.y)));
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x+uBox.width), static_cast<float>(uBox.y)));
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x+uBox.width), static_cast<float>(uBox.y+uBox.height)));
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x), static_cast<float>(uBox.y+uBox.height)));
    akaze_tracker.setFirstFrame(frame, bb, "AKAZE", stats);
    orb_tracker.setFirstFrame(frame, bb, "ORB", stats);
    Stats akaze_draw_stats, orb_draw_stats;
    Mat akaze_res, orb_res, res_frame;
    int i = 0;
    for(;;) {
        i++;
        bool update_stats = (i % stats_update_period == 0);
        video_in >> frame;
        // stop the program if no more images
        if(frame.empty()) break;
        akaze_res = akaze_tracker.process(frame, stats);
        akaze_stats += stats;
        if(update_stats) {
            akaze_draw_stats = stats;
        }
        orb->setMaxFeatures(stats.keypoints);
        orb_res = orb_tracker.process(frame, stats);
        orb_stats += stats;
        if(update_stats) {
            orb_draw_stats = stats;
        }
        drawStatistics(akaze_res, akaze_draw_stats);
        drawStatistics(orb_res, orb_draw_stats);
        vconcat(akaze_res, orb_res, res_frame);
        cv::imshow(video_name, res_frame);
        if(waitKey(1)==27) break; //quit on ESC button
    }
    akaze_stats /= i - 1;
    orb_stats /= i - 1;
    printStatistics("AKAZE", akaze_stats);
    printStatistics("ORB", orb_stats);
    return 0;
}

說(shuō)明

跟蹤類

該類實(shí)現(xiàn)了使用給定的特征檢測(cè)器和描述符匹配器描述的算法。

  • 設(shè)置第一幀
void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats)
{
    first_frame = frame.clone();
    (*detector)(first_frame, noArray(), first_kp, first_desc);
    stats.keypoints = (int)first_kp.size();
    drawBoundingBox(first_frame, bb);
    putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4);
    object_bb = bb;
}

我們從第一幀計(jì)算并存儲(chǔ)關(guān)鍵點(diǎn)和描述符,并準(zhǔn)備輸出。

我們需要保存檢測(cè)到的關(guān)鍵點(diǎn)的數(shù)量,以確保兩個(gè)檢測(cè)器的位置大致相同。

  • 處理框架

  1、找到關(guān)鍵點(diǎn)并計(jì)算描述符

(*detector)(frame, noArray(), kp, desc);

要找到幀之間的匹配,我們必須先找到關(guān)鍵點(diǎn)。

在本教程中,檢測(cè)器設(shè)置為在每個(gè)幀上找到約1000個(gè)關(guān)鍵點(diǎn)。

  2、使用2-nn匹配器查找通信

matcher->knnMatch(first_desc, desc, matches, 2);
for(unsigned i = 0; i < matches.size(); i++) {
    if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) {
        matched1.push_back(first_kp[matches[i][0].queryIdx]);
        matched2.push_back(      kp[matches[i][0].trainIdx]);
    }
}

如果最接近的匹配是nn_match_ratio比第二個(gè)最接近的匹配,那么它是一個(gè)匹配。

  3、使用RANSAC估計(jì)單變圖

homography = findHomography(Points(matched1), Points(matched2),
                            RANSAC, ransac_thresh, inlier_mask);

如果有至少4場(chǎng)比賽,我們可以使用隨機(jī)抽樣共識(shí)來(lái)估計(jì)圖像變換。

  4、保存內(nèi)在的

for(unsigned i = 0; i < matched1.size(); i++) {
    if(inlier_mask.at<uchar>(i)) {
        int new_i = static_cast<int>(inliers1.size());
        inliers1.push_back(matched1[i]);
        inliers2.push_back(matched2[i]);
        inlier_matches.push_back(DMatch(new_i, new_i, 0));
    }
}

因?yàn)?em>findHomography可以計(jì)算內(nèi)部值,所以我們只需要保存所選的點(diǎn)和匹配項(xiàng)。

  5、項(xiàng)目對(duì)象邊界框

perspectiveTransform(object_bb,new_bb,singography); 

如果有合理數(shù)量的內(nèi)部值,我們可以使用估計(jì)轉(zhuǎn)換來(lái)定位對(duì)象。

結(jié)果

AKAZE統(tǒng)計(jì):

Matches      626
Inliers      410
Inlier ratio 0.58
Keypoints    1117

ORB統(tǒng)計(jì):

Matches      504
Inliers      319
Inlier ratio 0.56
Keypoints    1112


以上內(nèi)容是否對(duì)您有幫助:
在線筆記
App下載
App下載

掃描二維碼

下載編程獅App

公眾號(hào)
微信公眾號(hào)

編程獅公眾號(hào)