Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util/util_math_matrix.h')
-rw-r--r--intern/cycles/util/util_math_matrix.h56
1 files changed, 28 insertions, 28 deletions
diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h
index c7511f8306e..b31dbe4fc67 100644
--- a/intern/cycles/util/util_math_matrix.h
+++ b/intern/cycles/util/util_math_matrix.h
@@ -223,20 +223,20 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float
{
const float singular_epsilon = 1e-9f;
- for (int row = 0; row < n; row++) {
- for (int col = 0; col < n; col++) {
+ for(int row = 0; row < n; row++) {
+ for(int col = 0; col < n; col++) {
MATS(V, n, row, col, v_stride) = (col == row) ? 1.0f : 0.0f;
}
}
- for (int sweep = 0; sweep < 8; sweep++) {
+ for(int sweep = 0; sweep < 8; sweep++) {
float off_diagonal = 0.0f;
- for (int row = 1; row < n; row++) {
- for (int col = 0; col < row; col++) {
+ for(int row = 1; row < n; row++) {
+ for(int col = 0; col < row; col++) {
off_diagonal += fabsf(MAT(A, n, row, col));
}
}
- if (off_diagonal < 1e-7f) {
+ if(off_diagonal < 1e-7f) {
/* The matrix has nearly reached diagonal form.
* Since the eigenvalues are only used to determine truncation, their exact values aren't required - a relative error of a few ULPs won't matter at all. */
break;
@@ -253,7 +253,7 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float
float abs_element = fabsf(element);
/* If we're in a later sweep and the element already is very small, just set it to zero and skip the rotation. */
- if (sweep > 3 && abs_element <= singular_epsilon*fabsf(MAT(A, n, row, row)) && abs_element <= singular_epsilon*fabsf(MAT(A, n, col, col))) {
+ if(sweep > 3 && abs_element <= singular_epsilon*fabsf(MAT(A, n, row, row)) && abs_element <= singular_epsilon*fabsf(MAT(A, n, col, col))) {
MAT(A, n, row, col) = 0.0f;
continue;
}
@@ -272,10 +272,10 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float
* Then, we compute sin(phi) and cos(phi) themselves. */
float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col);
float ratio;
- if (abs_element > singular_epsilon*fabsf(singular_diff)) {
+ if(abs_element > singular_epsilon*fabsf(singular_diff)) {
float cot_2phi = 0.5f*singular_diff / element;
ratio = 1.0f / (fabsf(cot_2phi) + sqrtf(1.0f + cot_2phi*cot_2phi));
- if (cot_2phi < 0.0f) ratio = -ratio; /* Copy sign. */
+ if(cot_2phi < 0.0f) ratio = -ratio; /* Copy sign. */
}
else {
ratio = element / singular_diff;
@@ -315,21 +315,21 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float
}
/* Sort eigenvalues and the associated eigenvectors. */
- for (int i = 0; i < n - 1; i++) {
+ for(int i = 0; i < n - 1; i++) {
float v = MAT(A, n, i, i);
int k = i;
- for (int j = i; j < n; j++) {
- if (MAT(A, n, j, j) >= v) {
+ for(int j = i; j < n; j++) {
+ if(MAT(A, n, j, j) >= v) {
v = MAT(A, n, j, j);
k = j;
}
}
- if (k != i) {
+ if(k != i) {
/* Swap eigenvalues. */
MAT(A, n, k, k) = MAT(A, n, i, i);
MAT(A, n, i, i) = v;
/* Swap eigenvectors. */
- for (int j = 0; j < n; j++) {
+ for(int j = 0; j < n; j++) {
float v = MATS(V, n, i, j, v_stride);
MATS(V, n, i, j, v_stride) = MATS(V, n, k, j, v_stride);
MATS(V, n, k, j, v_stride) = v;
@@ -339,59 +339,59 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float
}
#ifdef __KERNEL_SSE3__
-ccl_device_inline void math_vector_zero_sse(__m128 *A, int n)
+ccl_device_inline void math_vector_zero_sse(float4 *A, int n)
{
for(int i = 0; i < n; i++) {
- A[i] = _mm_setzero_ps();
+ A[i] = make_float4(0.0f);
}
}
-ccl_device_inline void math_matrix_zero_sse(__m128 *A, int n)
+ccl_device_inline void math_matrix_zero_sse(float4 *A, int n)
{
for(int row = 0; row < n; row++) {
for(int col = 0; col <= row; col++) {
- MAT(A, n, row, col) = _mm_setzero_ps();
+ MAT(A, n, row, col) = make_float4(0.0f);
}
}
}
/* Add Gramian matrix of v to A.
* The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j]. */
-ccl_device_inline void math_matrix_add_gramian_sse(__m128 *A, int n, const __m128 *ccl_restrict v, __m128 weight)
+ccl_device_inline void math_matrix_add_gramian_sse(float4 *A, int n, const float4 *ccl_restrict v, float4 weight)
{
for(int row = 0; row < n; row++) {
for(int col = 0; col <= row; col++) {
- MAT(A, n, row, col) = _mm_add_ps(MAT(A, n, row, col), _mm_mul_ps(_mm_mul_ps(v[row], v[col]), weight));
+ MAT(A, n, row, col) = MAT(A, n, row, col) + v[row] * v[col] * weight;
}
}
}
-ccl_device_inline void math_vector_add_sse(__m128 *V, int n, const __m128 *ccl_restrict a)
+ccl_device_inline void math_vector_add_sse(float4 *V, int n, const float4 *ccl_restrict a)
{
for(int i = 0; i < n; i++) {
- V[i] = _mm_add_ps(V[i], a[i]);
+ V[i] += a[i];
}
}
-ccl_device_inline void math_vector_mul_sse(__m128 *V, int n, const __m128 *ccl_restrict a)
+ccl_device_inline void math_vector_mul_sse(float4 *V, int n, const float4 *ccl_restrict a)
{
for(int i = 0; i < n; i++) {
- V[i] = _mm_mul_ps(V[i], a[i]);
+ V[i] *= a[i];
}
}
-ccl_device_inline void math_vector_max_sse(__m128 *a, const __m128 *ccl_restrict b, int n)
+ccl_device_inline void math_vector_max_sse(float4 *a, const float4 *ccl_restrict b, int n)
{
for(int i = 0; i < n; i++) {
- a[i] = _mm_max_ps(a[i], b[i]);
+ a[i] = max(a[i], b[i]);
}
}
-ccl_device_inline void math_matrix_hsum(float *A, int n, const __m128 *ccl_restrict B)
+ccl_device_inline void math_matrix_hsum(float *A, int n, const float4 *ccl_restrict B)
{
for(int row = 0; row < n; row++) {
for(int col = 0; col <= row; col++) {
- MAT(A, n, row, col) = _mm_hsum_ss(MAT(B, n, row, col));
+ MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col))[0];
}
}
}