在本教程中,您將學(xué)習(xí)如何:
主成分分析(PCA)是提取數(shù)據(jù)集最重要特征的統(tǒng)計(jì)程序。
考慮到您有一組2D點(diǎn),如上圖所示。每個(gè)維度對(duì)應(yīng)于您感興趣的功能。這里有些人可能會(huì)認(rèn)為這些點(diǎn)是按隨機(jī)順序設(shè)置的。但是,如果你有一個(gè)更好的外觀,你會(huì)看到有一個(gè)線性模式(由藍(lán)線表示),很難解雇。PCA的關(guān)鍵點(diǎn)是減少維度??s小維數(shù)是減少給定數(shù)據(jù)集維數(shù)的過程。例如,在上述情況下,可以將點(diǎn)的集合近似為單行,因此,將給定點(diǎn)的維數(shù)從2D減小到1D。
此外,您還可以看到,這些點(diǎn)在藍(lán)線上最多變化,而不是沿著“特征1”或“特征2”軸變化。這意味著如果您知道點(diǎn)沿藍(lán)色線的位置,您將獲得有關(guān)該點(diǎn)的更多信息,而不僅僅知道它在特征1軸或特征2軸上的位置。
因此,PCA允許我們找到我們的數(shù)據(jù)變化最大的方向。事實(shí)上,在圖中的點(diǎn)集上運(yùn)行PCA的結(jié)果由稱為特征向量的2個(gè)向量組成,它們是數(shù)據(jù)集的主要組成部分。
每個(gè)特征向量的大小被編碼在相應(yīng)的特征值中,并且指示數(shù)據(jù)沿主要分量變化多少。特征向量的開始是數(shù)據(jù)集中所有點(diǎn)的中心。將PCA應(yīng)用于N維數(shù)據(jù)集產(chǎn)生N N維特征向量,N個(gè)特征值和1個(gè)N維中心點(diǎn)。足夠的理論,讓我們看看我們?nèi)绾伟堰@些想法變成代碼。
我們的目標(biāo)是將給定的維度p的數(shù)據(jù)集x轉(zhuǎn)換為較小維度L的可選數(shù)據(jù)集y。。同樣,我們正在尋找矩陣Y,其中Y是矩陣X的Karhunen-Loève transform(KLT):
組織數(shù)據(jù)集
假設(shè)您擁有包含一組p變量觀測(cè)數(shù)據(jù)的數(shù)據(jù),并且您希望減少數(shù)據(jù),以便只能使用變量L,L < p。描述每個(gè)觀察值。進(jìn)一步假設(shè)數(shù)據(jù)被排列為n個(gè)數(shù)據(jù)向量的集合 x1...xn ,其中每個(gè)xi表示變量p的單個(gè)分組觀察值。
寫x1…xn為行向量,其中每個(gè)P列。
把行向量為一個(gè)矩陣X的尺寸N×P.
計(jì)算經(jīng)驗(yàn)平均值
求出每個(gè)維度j的平均經(jīng)驗(yàn)值j=1,,p....
將計(jì)算出平均值為實(shí)證均值向量U,大小為 p×1.
計(jì)算與平均值的偏差
平均減法是找到最小化近似數(shù)據(jù)的均方誤差的主成分基礎(chǔ)的解決方案的組成部分。因此,我們以數(shù)據(jù)為中心進(jìn)行如下:
Store平均減去數(shù)據(jù)在N×P矩陣B
其中h是一個(gè)N×1列向量在所有的 1s中:
找到協(xié)方差矩陣
發(fā)現(xiàn)P×P經(jīng)驗(yàn)協(xié)方差矩陣C從矩陣B外產(chǎn)品本身:
其中*是共軛轉(zhuǎn)置運(yùn)算符。注意,如果B完全由實(shí)數(shù)組成,在許多應(yīng)用中是這種情況,則“共軛轉(zhuǎn)置”與常規(guī)轉(zhuǎn)置相同。
找到協(xié)方差矩陣的特征向量和特征值
其中D是C的特征值的對(duì)角矩陣。
在這里,λJ是協(xié)方差矩陣C的第j個(gè)特征值
本教程代碼如下所示。您也可以從這里下載。
#include <iostream>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
// Function declarations
void drawAxis(Mat&, Point, Point, Scalar, const float);
double getOrientation(const vector<Point> &, Mat&);
void drawAxis(Mat& img, Point p, Point q, Scalar colour, const float scale = 0.2)
{
double angle;
double hypotenuse;
angle = atan2( (double) p.y - q.y, (double) p.x - q.x ); // angle in radians
hypotenuse = sqrt( (double) (p.y - q.y) * (p.y - q.y) + (p.x - q.x) * (p.x - q.x));
// double degrees = angle * 180 / CV_PI; // convert radians to degrees (0-180 range)
// cout << "Degrees: " << abs(degrees - 180) << endl; // angle in 0-360 degrees range
// Here we lengthen the arrow by a factor of scale
q.x = (int) (p.x - scale * hypotenuse * cos(angle));
q.y = (int) (p.y - scale * hypotenuse * sin(angle));
line(img, p, q, colour, 1, CV_AA);
// create the arrow hooks
p.x = (int) (q.x + 9 * cos(angle + CV_PI / 4));
p.y = (int) (q.y + 9 * sin(angle + CV_PI / 4));
line(img, p, q, colour, 1, CV_AA);
p.x = (int) (q.x + 9 * cos(angle - CV_PI / 4));
p.y = (int) (q.y + 9 * sin(angle - CV_PI / 4));
line(img, p, q, colour, 1, CV_AA);
}
double getOrientation(const vector<Point> &pts, Mat &img)
{
//Construct a buffer used by the pca analysis
int sz = static_cast<int>(pts.size());
Mat data_pts = Mat(sz, 2, CV_64FC1);
for (int i = 0; i < data_pts.rows; ++i)
{
data_pts.at<double>(i, 0) = pts[i].x;
data_pts.at<double>(i, 1) = pts[i].y;
}
//Perform PCA analysis
PCA pca_analysis(data_pts, Mat(), CV_PCA_DATA_AS_ROW);
//Store the center of the object
Point cntr = Point(static_cast<int>(pca_analysis.mean.at<double>(0, 0)),
static_cast<int>(pca_analysis.mean.at<double>(0, 1)));
//Store the eigenvalues and eigenvectors
vector<Point2d> eigen_vecs(2);
vector<double> eigen_val(2);
for (int i = 0; i < 2; ++i)
{
eigen_vecs[i] = Point2d(pca_analysis.eigenvectors.at<double>(i, 0),
pca_analysis.eigenvectors.at<double>(i, 1));
eigen_val[i] = pca_analysis.eigenvalues.at<double>(i);
}
// Draw the principal components
circle(img, cntr, 3, Scalar(255, 0, 255), 2);
Point p1 = cntr + 0.02 * Point(static_cast<int>(eigen_vecs[0].x * eigen_val[0]), static_cast<int>(eigen_vecs[0].y * eigen_val[0]));
Point p2 = cntr - 0.02 * Point(static_cast<int>(eigen_vecs[1].x * eigen_val[1]), static_cast<int>(eigen_vecs[1].y * eigen_val[1]));
drawAxis(img, cntr, p1, Scalar(0, 255, 0), 1);
drawAxis(img, cntr, p2, Scalar(255, 255, 0), 5);
double angle = atan2(eigen_vecs[0].y, eigen_vecs[0].x); // orientation in radians
return angle;
}
int main(int argc, char** argv)
{
// Load image
String imageName("../data/pca_test1.jpg"); // by default
if (argc > 1)
{
imageName = argv[1];
}
Mat src = imread( imageName );
// Check if image is loaded successfully
if(!src.data || src.empty())
{
cout << "Problem loading image!!!" << endl;
return EXIT_FAILURE;
}
imshow("src", src);
// Convert image to grayscale
Mat gray;
cvtColor(src, gray, COLOR_BGR2GRAY);
// Convert image to binary
Mat bw;
threshold(gray, bw, 50, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
// Find all the contours in the thresholded image
vector<Vec4i> hierarchy;
vector<vector<Point> > contours;
findContours(bw, contours, hierarchy, CV_RETR_LIST, CV_CHAIN_APPROX_NONE);
for (size_t i = 0; i < contours.size(); ++i)
{
// Calculate the area of each contour
double area = contourArea(contours[i]);
// Ignore contours that are too small or too large
if (area < 1e2 || 1e5 < area) continue;
// Draw each contour only for visualisation purposes
drawContours(src, contours, static_cast<int>(i), Scalar(0, 0, 255), 2, 8, hierarchy, 0);
// Find the orientation of each shape
getOrientation(contours[i], src);
}
imshow("output", src);
waitKey(0);
return 0;
}
另一個(gè)例子使用PCA降維的同時(shí)保持方差量可以在opencv_source_code / samples / cpp / pca.cpp發(fā)現(xiàn)
// Load image
String imageName("../data/pca_test1.jpg"); // by default
if (argc > 1)
{
imageName = argv[1];
}
Mat src = imread( imageName );
// Check if image is loaded successfully
if(!src.data || src.empty())
{
cout << "Problem loading image!!!" << endl;
return EXIT_FAILURE;
}
imshow("src", src);
// Convert image to grayscale
Mat gray;
cvtColor(src, gray, COLOR_BGR2GRAY);
// Convert image to binary
Mat bw;
threshold(gray, bw, 50, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
然后通過大小查找和過濾輪廓,并獲得剩余輪廓的方向。
// Find all the contours in the thresholded image
vector<Vec4i> hierarchy;
vector<vector<Point> > contours;
findContours(bw, contours, hierarchy, CV_RETR_LIST, CV_CHAIN_APPROX_NONE);
for (size_t i = 0; i < contours.size(); ++i)
{
// Calculate the area of each contour
double area = contourArea(contours[i]);
// Ignore contours that are too small or too large
if (area < 1e2 || 1e5 < area) continue;
// Draw each contour only for visualisation purposes
drawContours(src, contours, static_cast<int>(i), Scalar(0, 0, 255), 2, 8, hierarchy, 0);
// Find the orientation of each shape
getOrientation(contours[i], src);
}
通過調(diào)用getOrientation()函數(shù)提取方向,該函數(shù)執(zhí)行所有PCA過程。
//Construct a buffer used by the pca analysis
int sz = static_cast<int>(pts.size());
Mat data_pts = Mat(sz, 2, CV_64FC1);
for (int i = 0; i < data_pts.rows; ++i)
{
data_pts.at<double>(i, 0) = pts[i].x;
data_pts.at<double>(i, 1) = pts[i].y;
}
//Perform PCA analysis
PCA pca_analysis(data_pts, Mat(), CV_PCA_DATA_AS_ROW);
//Store the center of the object
Point cntr = Point(static_cast<int>(pca_analysis.mean.at<double>(0, 0)),
static_cast<int>(pca_analysis.mean.at<double>(0, 1)));
//Store the eigenvalues and eigenvectors
vector<Point2d> eigen_vecs(2);
vector<double> eigen_val(2);
for (int i = 0; i < 2; ++i)
{
eigen_vecs[i] = Point2d(pca_analysis.eigenvectors.at<double>(i, 0),
pca_analysis.eigenvectors.at<double>(i, 1));
eigen_val[i] = pca_analysis.eigenvalues.at<double>(i);
}
首先,數(shù)據(jù)需要排列成大小為n×2的矩陣,其中n是我們擁有的數(shù)據(jù)點(diǎn)數(shù)。那么我們可以執(zhí)行PCA分析。計(jì)算平均值(即質(zhì)心)存儲(chǔ)在cntr變量中,特征向量和特征值存儲(chǔ)在相應(yīng)的std :: vector中。
最終結(jié)果通過drawAxis()函數(shù)進(jìn)行可視化,其中主要成分以行表示,每個(gè)特征向量乘以其特征值并轉(zhuǎn)換為平均位置。
// Draw the principal components
circle(img, cntr, 3, Scalar(255, 0, 255), 2);
Point p1 = cntr + 0.02 * Point(static_cast<int>(eigen_vecs[0].x * eigen_val[0]), static_cast<int>(eigen_vecs[0].y * eigen_val[0]));
Point p2 = cntr - 0.02 * Point(static_cast<int>(eigen_vecs[1].x * eigen_val[1]), static_cast<int>(eigen_vecs[1].y * eigen_val[1]));
drawAxis(img, cntr, p1, Scalar(0, 255, 0), 1);
drawAxis(img, cntr, p2, Scalar(255, 255, 0), 5);
double angle = atan2(eigen_vecs[0].y, eigen_vecs[0].x); // orientation in radians
double angle;
double hypotenuse;
angle = atan2( (double) p.y - q.y, (double) p.x - q.x ); // angle in radians
hypotenuse = sqrt( (double) (p.y - q.y) * (p.y - q.y) + (p.x - q.x) * (p.x - q.x));
// double degrees = angle * 180 / CV_PI; // convert radians to degrees (0-180 range)
// cout << "Degrees: " << abs(degrees - 180) << endl; // angle in 0-360 degrees range
// Here we lengthen the arrow by a factor of scale
q.x = (int) (p.x - scale * hypotenuse * cos(angle));
q.y = (int) (p.y - scale * hypotenuse * sin(angle));
line(img, p, q, colour, 1, CV_AA);
// create the arrow hooks
p.x = (int) (q.x + 9 * cos(angle + CV_PI / 4));
p.y = (int) (q.y + 9 * sin(angle + CV_PI / 4));
line(img, p, q, colour, 1, CV_AA);
p.x = (int) (q.x + 9 * cos(angle - CV_PI / 4));
p.y = (int) (q.y + 9 * sin(angle - CV_PI / 4));
line(img, p, q, colour, 1, CV_AA);
代碼打開圖像,找到感興趣的被檢測(cè)對(duì)象的方位,然后通過繪制所檢測(cè)到的對(duì)象的輪廓,中心點(diǎn)和關(guān)于提取的取向的x軸,y軸來顯示結(jié)果。
更多建議: