Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/libmv/libmv/tracking/brute_region_tracker.cc')
-rw-r--r--intern/libmv/libmv/tracking/brute_region_tracker.cc195
1 files changed, 117 insertions, 78 deletions
diff --git a/intern/libmv/libmv/tracking/brute_region_tracker.cc b/intern/libmv/libmv/tracking/brute_region_tracker.cc
index 85aecb7f633..7007fb9440b 100644
--- a/intern/libmv/libmv/tracking/brute_region_tracker.cc
+++ b/intern/libmv/libmv/tracking/brute_region_tracker.cc
@@ -25,31 +25,30 @@
#endif
#include "libmv/base/aligned_malloc.h"
-#include "libmv/image/image.h"
#include "libmv/image/convolve.h"
#include "libmv/image/correlation.h"
+#include "libmv/image/image.h"
#include "libmv/image/sample.h"
#include "libmv/logging/logging.h"
namespace libmv {
namespace {
-bool RegionIsInBounds(const FloatImage &image1,
- double x, double y,
+bool RegionIsInBounds(const FloatImage& image1,
+ double x,
+ double y,
int half_window_size) {
// Check the minimum coordinates.
int min_x = floor(x) - half_window_size - 1;
int min_y = floor(y) - half_window_size - 1;
- if (min_x < 0.0 ||
- min_y < 0.0) {
+ if (min_x < 0.0 || min_y < 0.0) {
return false;
}
// Check the maximum coordinates.
int max_x = ceil(x) + half_window_size + 1;
int max_y = ceil(y) + half_window_size + 1;
- if (max_x > image1.cols() ||
- max_y > image1.rows()) {
+ if (max_x > image1.cols() || max_y > image1.rows()) {
return false;
}
@@ -69,14 +68,15 @@ bool RegionIsInBounds(const FloatImage &image1,
// and "b", since the SSE load instructionst will pull in memory past the end
// of the arrays if their size is not a multiple of 16.
inline static __m128i SumOfAbsoluteDifferencesContiguousSSE(
- const unsigned char *a, // aligned
- const unsigned char *b, // not aligned
+ const unsigned char* a, // aligned
+ const unsigned char* b, // not aligned
unsigned int size,
__m128i sad) {
// Do the bulk of the work as 16-way integer operations.
for (unsigned int j = 0; j < size / 16; j++) {
- sad = _mm_add_epi32(sad, _mm_sad_epu8( _mm_load_si128 ((__m128i*)(a + 16 * j)),
- _mm_loadu_si128((__m128i*)(b + 16 * j))));
+ sad = _mm_add_epi32(sad,
+ _mm_sad_epu8(_mm_load_si128((__m128i*)(a + 16 * j)),
+ _mm_loadu_si128((__m128i*)(b + 16 * j))));
}
// Handle the trailing end.
// TODO(keir): Benchmark to verify that the below SSE is a win compared to a
@@ -90,32 +90,63 @@ inline static __m128i SumOfAbsoluteDifferencesContiguousSSE(
unsigned int remainder = size % 16u;
if (remainder) {
unsigned int j = size / 16;
- __m128i a_trail = _mm_load_si128 ((__m128i*)(a + 16 * j));
+ __m128i a_trail = _mm_load_si128((__m128i*)(a + 16 * j));
__m128i b_trail = _mm_loadu_si128((__m128i*)(b + 16 * j));
__m128i mask;
switch (remainder) {
-#define X 0xff
- case 1: mask = _mm_setr_epi8(X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
- case 2: mask = _mm_setr_epi8(X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
- case 3: mask = _mm_setr_epi8(X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
- case 4: mask = _mm_setr_epi8(X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
- case 5: mask = _mm_setr_epi8(X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
- case 6: mask = _mm_setr_epi8(X, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
- case 7: mask = _mm_setr_epi8(X, X, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
- case 8: mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0); break;
- case 9: mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0); break;
- case 10: mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, 0, 0, 0, 0, 0, 0); break;
- case 11: mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, X, 0, 0, 0, 0, 0); break;
- case 12: mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, X, X, 0, 0, 0, 0); break;
- case 13: mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, X, X, X, 0, 0, 0); break;
- case 14: mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, X, X, X, X, 0, 0); break;
- case 15: mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, 0); break;
+# define X 0xff
+ case 1:
+ mask = _mm_setr_epi8(X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ break;
+ case 2:
+ mask = _mm_setr_epi8(X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ break;
+ case 3:
+ mask = _mm_setr_epi8(X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ break;
+ case 4:
+ mask = _mm_setr_epi8(X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ break;
+ case 5:
+ mask = _mm_setr_epi8(X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ break;
+ case 6:
+ mask = _mm_setr_epi8(X, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ break;
+ case 7:
+ mask = _mm_setr_epi8(X, X, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ break;
+ case 8:
+ mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0);
+ break;
+ case 9:
+ mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0);
+ break;
+ case 10:
+ mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, 0, 0, 0, 0, 0, 0);
+ break;
+ case 11:
+ mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, X, 0, 0, 0, 0, 0);
+ break;
+ case 12:
+ mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, X, X, 0, 0, 0, 0);
+ break;
+ case 13:
+ mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, X, X, X, 0, 0, 0);
+ break;
+ case 14:
+ mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, X, X, X, X, 0, 0);
+ break;
+ case 15:
+ mask = _mm_setr_epi8(X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, 0);
+ break;
// To silence compiler warning.
default: mask = _mm_setzero_si128(); break;
-#undef X
+# undef X
}
- sad = _mm_add_epi32(sad, _mm_sad_epu8(_mm_and_si128(mask, a_trail),
- _mm_and_si128(mask, b_trail)));
+ sad = _mm_add_epi32(sad,
+ _mm_sad_epu8(_mm_and_si128(mask, a_trail),
+ _mm_and_si128(mask, b_trail)));
}
return sad;
}
@@ -124,13 +155,12 @@ inline static __m128i SumOfAbsoluteDifferencesContiguousSSE(
// Computes the sum of absolute differences between pattern and image. Pattern
// must be 16-byte aligned, and the stride must be a multiple of 16. The image
// does pointer does not have to be aligned.
-int SumOfAbsoluteDifferencesContiguousImage(
- const unsigned char *pattern,
- unsigned int pattern_width,
- unsigned int pattern_height,
- unsigned int pattern_stride,
- const unsigned char *image,
- unsigned int image_stride) {
+int SumOfAbsoluteDifferencesContiguousImage(const unsigned char* pattern,
+ unsigned int pattern_width,
+ unsigned int pattern_height,
+ unsigned int pattern_stride,
+ const unsigned char* image,
+ unsigned int image_stride) {
#ifdef __SSE2__
// TODO(keir): Add interleaved accumulation, where accumulation is done into
// two or more SSE registers that then get combined at the end. This reduces
@@ -145,8 +175,7 @@ int SumOfAbsoluteDifferencesContiguousImage(
sad);
}
return _mm_cvtsi128_si32(
- _mm_add_epi32(sad,
- _mm_shuffle_epi32(sad, _MM_SHUFFLE(3, 0, 1, 2))));
+ _mm_add_epi32(sad, _mm_shuffle_epi32(sad, _MM_SHUFFLE(3, 0, 1, 2))));
#else
int sad = 0;
for (int r = 0; r < pattern_height; ++r) {
@@ -161,12 +190,13 @@ int SumOfAbsoluteDifferencesContiguousImage(
// Sample a region of size width, height centered at x,y in image, converting
// from float to byte in the process. Samples from the first channel. Puts
// result into *pattern.
-void SampleRectangularPattern(const FloatImage &image,
- double x, double y,
+void SampleRectangularPattern(const FloatImage& image,
+ double x,
+ double y,
int width,
int height,
int pattern_stride,
- unsigned char *pattern) {
+ unsigned char* pattern) {
// There are two cases for width and height: even or odd. If it's odd, then
// the bounds [-width / 2, width / 2] works as expected. However, for even,
// this results in one extra access past the end. So use < instead of <= in
@@ -175,7 +205,7 @@ void SampleRectangularPattern(const FloatImage &image,
int end_height = (height / 2) + (height % 2);
for (int r = -height / 2; r < end_height; ++r) {
for (int c = -width / 2; c < end_width; ++c) {
- pattern[pattern_stride * (r + height / 2) + c + width / 2] =
+ pattern[pattern_stride * (r + height / 2) + c + width / 2] =
SampleLinear(image, y + r, x + c, 0) * 255.0;
}
}
@@ -195,30 +225,30 @@ inline int PadToAlignment(int x, int alignment) {
// is returned in *pattern_stride.
//
// NOTE: Caller must free *pattern with aligned_malloc() from above.
-void SampleSquarePattern(const FloatImage &image,
- double x, double y,
+void SampleSquarePattern(const FloatImage& image,
+ double x,
+ double y,
int half_width,
- unsigned char **pattern,
- int *pattern_stride) {
+ unsigned char** pattern,
+ int* pattern_stride) {
int width = 2 * half_width + 1;
// Allocate an aligned block with padding on the end so each row of the
// pattern starts on a 16-byte boundary.
*pattern_stride = PadToAlignment(width, 16);
int pattern_size_bytes = *pattern_stride * width;
- *pattern = static_cast<unsigned char *>(
- aligned_malloc(pattern_size_bytes, 16));
- SampleRectangularPattern(image, x, y, width, width,
- *pattern_stride,
- *pattern);
+ *pattern =
+ static_cast<unsigned char*>(aligned_malloc(pattern_size_bytes, 16));
+ SampleRectangularPattern(
+ image, x, y, width, width, *pattern_stride, *pattern);
}
// NOTE: Caller must free *image with aligned_malloc() from above.
-void FloatArrayToByteArrayWithPadding(const FloatImage &float_image,
- unsigned char **image,
- int *image_stride) {
+void FloatArrayToByteArrayWithPadding(const FloatImage& float_image,
+ unsigned char** image,
+ int* image_stride) {
// Allocate enough so that accessing 16 elements past the end is fine.
*image_stride = float_image.Width() + 16;
- *image = static_cast<unsigned char *>(
+ *image = static_cast<unsigned char*>(
aligned_malloc(*image_stride * float_image.Height(), 16));
for (int i = 0; i < float_image.Height(); ++i) {
for (int j = 0; j < float_image.Width(); ++j) {
@@ -235,10 +265,12 @@ void FloatArrayToByteArrayWithPadding(const FloatImage &float_image,
// values for every hypothesis looks like.
//
// TODO(keir): Priority queue for multiple hypothesis.
-bool BruteRegionTracker::Track(const FloatImage &image1,
- const FloatImage &image2,
- double x1, double y1,
- double *x2, double *y2) const {
+bool BruteRegionTracker::Track(const FloatImage& image1,
+ const FloatImage& image2,
+ double x1,
+ double y1,
+ double* x2,
+ double* y2) const {
if (!RegionIsInBounds(image1, x1, y1, half_window_size)) {
LG << "Fell out of image1's window with x1=" << x1 << ", y1=" << y1
<< ", hw=" << half_window_size << ".";
@@ -252,28 +284,28 @@ bool BruteRegionTracker::Track(const FloatImage &image1,
BlurredImageAndDerivativesChannels(image2, 0.9, &image_and_gradient2);
// Sample the pattern to get it aligned to an image grid.
- unsigned char *pattern;
+ unsigned char* pattern;
int pattern_stride;
- SampleSquarePattern(image_and_gradient1, x1, y1, half_window_size,
- &pattern,
- &pattern_stride);
+ SampleSquarePattern(
+ image_and_gradient1, x1, y1, half_window_size, &pattern, &pattern_stride);
// Convert the search area directly to bytes without sampling.
- unsigned char *search_area;
+ unsigned char* search_area;
int search_area_stride;
- FloatArrayToByteArrayWithPadding(image_and_gradient2, &search_area,
- &search_area_stride);
+ FloatArrayToByteArrayWithPadding(
+ image_and_gradient2, &search_area, &search_area_stride);
// Try all possible locations inside the search area. Yes, everywhere.
int best_i = -1, best_j = -1, best_sad = INT_MAX;
for (int i = 0; i < image2.Height() - pattern_width; ++i) {
for (int j = 0; j < image2.Width() - pattern_width; ++j) {
- int sad = SumOfAbsoluteDifferencesContiguousImage(pattern,
- pattern_width,
- pattern_width,
- pattern_stride,
- search_area + search_area_stride * i + j,
- search_area_stride);
+ int sad = SumOfAbsoluteDifferencesContiguousImage(
+ pattern,
+ pattern_width,
+ pattern_width,
+ pattern_stride,
+ search_area + search_area_stride * i + j,
+ search_area_stride);
if (sad < best_sad) {
best_i = i;
best_j = j;
@@ -309,16 +341,23 @@ bool BruteRegionTracker::Track(const FloatImage &image1,
}
Array3Df image_and_gradient1_sampled, image_and_gradient2_sampled;
- SamplePattern(image_and_gradient1, x1, y1, half_window_size, 3,
+ SamplePattern(image_and_gradient1,
+ x1,
+ y1,
+ half_window_size,
+ 3,
&image_and_gradient1_sampled);
- SamplePattern(image_and_gradient2, *x2, *y2, half_window_size, 3,
+ SamplePattern(image_and_gradient2,
+ *x2,
+ *y2,
+ half_window_size,
+ 3,
&image_and_gradient2_sampled);
// Compute the Pearson product-moment correlation coefficient to check
// for sanity.
double correlation = PearsonProductMomentCorrelation(
- image_and_gradient1_sampled,
- image_and_gradient2_sampled);
+ image_and_gradient1_sampled, image_and_gradient2_sampled);
LG << "Final correlation: " << correlation;