Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2018-05-31 17:36:20 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2018-06-01 13:00:11 +0300
commit01c75c3765eb305b1a99b794c1d40ad224b071c6 (patch)
treea4ee258072c82a9c3b8b581e8a346a8c108f2647
parent719e782f2c790aab7a822ad9e01a4fa8c93b5620 (diff)
Math: optimizations for 4x4x matrix inverse, multiplications.
In some heavy rigs matrix inverse can be 10% of computation time. This reduces it to 2% by using Eigen's optimized 4x4 matrix inverse and SSE matrix multiplication.
-rw-r--r--build_files/cmake/Modules/GTestTesting.cmake1
-rw-r--r--intern/eigen/CMakeLists.txt2
-rw-r--r--intern/eigen/eigen_capi.h1
-rw-r--r--intern/eigen/intern/matrix.cc56
-rw-r--r--intern/eigen/intern/matrix.h40
-rw-r--r--intern/eigen/intern/svd.cc3
-rw-r--r--source/blender/blenlib/intern/math_matrix.c91
7 files changed, 128 insertions, 66 deletions
diff --git a/build_files/cmake/Modules/GTestTesting.cmake b/build_files/cmake/Modules/GTestTesting.cmake
index ba1334d750e..dd80013cb91 100644
--- a/build_files/cmake/Modules/GTestTesting.cmake
+++ b/build_files/cmake/Modules/GTestTesting.cmake
@@ -32,6 +32,7 @@ macro(BLENDER_SRC_GTEST_EX NAME SRC EXTRA_LIBS DO_ADD_TEST)
${EXTRA_LIBS}
${PLATFORM_LINKLIBS}
bf_testing_main
+ bf_intern_eigen
bf_intern_guardedalloc
extern_gtest
extern_gmock
diff --git a/intern/eigen/CMakeLists.txt b/intern/eigen/CMakeLists.txt
index 5811b71de94..b2639359318 100644
--- a/intern/eigen/CMakeLists.txt
+++ b/intern/eigen/CMakeLists.txt
@@ -36,10 +36,12 @@ set(SRC
intern/eigenvalues.cc
intern/linear_solver.cc
+ intern/matrix.cc
intern/svd.cc
intern/eigenvalues.h
intern/linear_solver.h
+ intern/matrix.h
intern/svd.h
)
diff --git a/intern/eigen/eigen_capi.h b/intern/eigen/eigen_capi.h
index be42e340274..7f3267ff508 100644
--- a/intern/eigen/eigen_capi.h
+++ b/intern/eigen/eigen_capi.h
@@ -29,6 +29,7 @@
#include "intern/eigenvalues.h"
#include "intern/linear_solver.h"
+#include "intern/matrix.h"
#include "intern/svd.h"
#endif /* __EIGEN_C_API_H__ */
diff --git a/intern/eigen/intern/matrix.cc b/intern/eigen/intern/matrix.cc
new file mode 100644
index 00000000000..cc9e24d2b3c
--- /dev/null
+++ b/intern/eigen/intern/matrix.cc
@@ -0,0 +1,56 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2015 Blender Foundation.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation,
+ * Bastien Montagne
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+#ifndef __EIGEN3_MATRIX_C_API_CC__
+#define __EIGEN3_MATRIX_C_API_CC__
+
+/* Eigen gives annoying huge amount of warnings here, silence them! */
+#if defined(__GNUC__) && !defined(__clang__)
+# pragma GCC diagnostic ignored "-Wlogical-op"
+#endif
+
+#ifdef __EIGEN3_MATRIX_C_API_CC__ /* quiet warning */
+#endif
+
+#include <Eigen/Core>
+#include <Eigen/Dense>
+
+#include "matrix.h"
+
+using Eigen::Map;
+using Eigen::Matrix4f;
+
+bool EIG_invert_m4_m4(float inverse[4][4], const float matrix[4][4])
+{
+ Map<Matrix4f> M = Map<Matrix4f>((float*)matrix);
+ Matrix4f R;
+ bool invertible = true;
+ M.computeInverseWithCheck(R, invertible, 0.0f);
+ memcpy(inverse, R.data(), sizeof(float)*4*4);
+ return invertible;
+}
+
+#endif /* __EIGEN3_MATRIX_C_API_CC__ */
diff --git a/intern/eigen/intern/matrix.h b/intern/eigen/intern/matrix.h
new file mode 100644
index 00000000000..fd0f60fd81c
--- /dev/null
+++ b/intern/eigen/intern/matrix.h
@@ -0,0 +1,40 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2015 Blender Foundation.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation,
+ * Bastien Montagne
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+#ifndef __EIGEN3_MATRIX_C_API_H__
+#define __EIGEN3_MATRIX_C_API_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool EIG_invert_m4_m4(float inverse[4][4], const float matrix[4][4]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __EIGEN3_MATRIX_C_API_H__ */
diff --git a/intern/eigen/intern/svd.cc b/intern/eigen/intern/svd.cc
index 7c331d25aa7..e035f29f5ac 100644
--- a/intern/eigen/intern/svd.cc
+++ b/intern/eigen/intern/svd.cc
@@ -37,6 +37,7 @@
#include <Eigen/Core>
#include <Eigen/SVD>
+#include <Eigen/Dense>
#include "svd.h"
@@ -51,6 +52,8 @@ using Eigen::MatrixXf;
using Eigen::VectorXf;
using Eigen::Map;
+using Eigen::Matrix4f;
+
void EIG_svd_square_matrix(const int size, const float *matrix, float *r_U, float *r_S, float *r_V)
{
/* Since our matrix is squared, we can use thinU/V. */
diff --git a/source/blender/blenlib/intern/math_matrix.c b/source/blender/blenlib/intern/math_matrix.c
index 3559500bf63..0272fa6f52b 100644
--- a/source/blender/blenlib/intern/math_matrix.c
+++ b/source/blender/blenlib/intern/math_matrix.c
@@ -33,6 +33,8 @@
#include "BLI_strict_flags.h"
+#include "eigen_capi.h"
+
/********************************* Init **************************************/
void zero_m2(float m[2][2])
@@ -192,6 +194,25 @@ void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4])
BLI_assert(R != A && R != B);
/* matrix product: R[j][k] = A[j][i] . B[i][k] */
+#ifdef __SSE2__
+ __m128 A0 = _mm_loadu_ps(A[0]);
+ __m128 A1 = _mm_loadu_ps(A[1]);
+ __m128 A2 = _mm_loadu_ps(A[2]);
+ __m128 A3 = _mm_loadu_ps(A[3]);
+
+ for (int i = 0; i < 4; i++) {
+ __m128 B0 = _mm_set1_ps(B[i][0]);
+ __m128 B1 = _mm_set1_ps(B[i][1]);
+ __m128 B2 = _mm_set1_ps(B[i][2]);
+ __m128 B3 = _mm_set1_ps(B[i][3]);
+
+ __m128 sum = _mm_add_ps(
+ _mm_add_ps(_mm_mul_ps(B0, A0), _mm_mul_ps(B1, A1)),
+ _mm_add_ps(_mm_mul_ps(B2, A2), _mm_mul_ps(B3, A3)));
+
+ _mm_storeu_ps(R[i], sum);
+ }
+#else
R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0] + B[0][3] * A[3][0];
R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1] + B[0][3] * A[3][1];
R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2] + B[0][3] * A[3][2];
@@ -211,6 +232,7 @@ void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4])
R[3][1] = B[3][0] * A[0][1] + B[3][1] * A[1][1] + B[3][2] * A[2][1] + B[3][3] * A[3][1];
R[3][2] = B[3][0] * A[0][2] + B[3][1] * A[1][2] + B[3][2] * A[2][2] + B[3][3] * A[3][2];
R[3][3] = B[3][0] * A[0][3] + B[3][1] * A[1][3] + B[3][2] * A[2][3] + B[3][3] * A[3][3];
+#endif
}
void mul_m4_m4_pre(float R[4][4], const float A[4][4])
@@ -875,74 +897,11 @@ bool invert_m4(float m[4][4])
return success;
}
-/*
- * invertmat -
- * computes the inverse of mat and puts it in inverse. Returns
- * true on success (i.e. can always find a pivot) and false on failure.
- * Uses Gaussian Elimination with partial (maximal column) pivoting.
- *
- * Mark Segal - 1992
- */
-
bool invert_m4_m4(float inverse[4][4], const float mat[4][4])
{
- int i, j, k;
- double temp;
- float tempmat[4][4];
- float max;
- int maxj;
-
- BLI_assert(inverse != mat);
-
- /* Set inverse to identity */
- for (i = 0; i < 4; i++)
- for (j = 0; j < 4; j++)
- inverse[i][j] = 0;
- for (i = 0; i < 4; i++)
- inverse[i][i] = 1;
-
- /* Copy original matrix so we don't mess it up */
- for (i = 0; i < 4; i++)
- for (j = 0; j < 4; j++)
- tempmat[i][j] = mat[i][j];
-
- for (i = 0; i < 4; i++) {
- /* Look for row with max pivot */
- max = fabsf(tempmat[i][i]);
- maxj = i;
- for (j = i + 1; j < 4; j++) {
- if (fabsf(tempmat[j][i]) > max) {
- max = fabsf(tempmat[j][i]);
- maxj = j;
- }
- }
- /* Swap rows if necessary */
- if (maxj != i) {
- for (k = 0; k < 4; k++) {
- SWAP(float, tempmat[i][k], tempmat[maxj][k]);
- SWAP(float, inverse[i][k], inverse[maxj][k]);
- }
- }
-
- if (UNLIKELY(tempmat[i][i] == 0.0f)) {
- return false; /* No non-zero pivot */
- }
- temp = (double)tempmat[i][i];
- for (k = 0; k < 4; k++) {
- tempmat[i][k] = (float)((double)tempmat[i][k] / temp);
- inverse[i][k] = (float)((double)inverse[i][k] / temp);
- }
- for (j = 0; j < 4; j++) {
- if (j != i) {
- temp = tempmat[j][i];
- for (k = 0; k < 4; k++) {
- tempmat[j][k] -= (float)((double)tempmat[i][k] * temp);
- inverse[j][k] -= (float)((double)inverse[i][k] * temp);
- }
- }
- }
- }
- return true;
+ /* Use optimized matrix inverse from Eigen, since performance
+ * impact of this function is significant in complex rigs. */
+ return EIG_invert_m4_m4(inverse, mat);
}
/****************************** Linear Algebra *******************************/