Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2017-07-19 02:54:56 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2017-08-07 15:01:24 +0300
commitee77c1e917dd5e9d8eaca6212376a11aef1a877d (patch)
tree913bb920bd8ac48c392f63b2cebd0801430e3218 /intern/cycles/util/util_math_matrix.h
parenta24fbf3323101cd35332161b12a04e687b5583e4 (diff)
Code refactor: use float4 instead of intrinsics for CPU denoise filtering.
Differential Revision: https://developer.blender.org/D2764
Diffstat (limited to 'intern/cycles/util/util_math_matrix.h')
-rw-r--r--intern/cycles/util/util_math_matrix.h28
1 files changed, 14 insertions, 14 deletions
diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h
index c7511f8306e..7269d391956 100644
--- a/intern/cycles/util/util_math_matrix.h
+++ b/intern/cycles/util/util_math_matrix.h
@@ -339,59 +339,59 @@ ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float
}
#ifdef __KERNEL_SSE3__
-ccl_device_inline void math_vector_zero_sse(__m128 *A, int n)
+ccl_device_inline void math_vector_zero_sse(float4 *A, int n)
{
for(int i = 0; i < n; i++) {
- A[i] = _mm_setzero_ps();
+ A[i] = make_float4(0.0f);
}
}
-ccl_device_inline void math_matrix_zero_sse(__m128 *A, int n)
+ccl_device_inline void math_matrix_zero_sse(float4 *A, int n)
{
for(int row = 0; row < n; row++) {
for(int col = 0; col <= row; col++) {
- MAT(A, n, row, col) = _mm_setzero_ps();
+ MAT(A, n, row, col) = make_float4(0.0f);
}
}
}
/* Add Gramian matrix of v to A.
* The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j]. */
-ccl_device_inline void math_matrix_add_gramian_sse(__m128 *A, int n, const __m128 *ccl_restrict v, __m128 weight)
+ccl_device_inline void math_matrix_add_gramian_sse(float4 *A, int n, const float4 *ccl_restrict v, float4 weight)
{
for(int row = 0; row < n; row++) {
for(int col = 0; col <= row; col++) {
- MAT(A, n, row, col) = _mm_add_ps(MAT(A, n, row, col), _mm_mul_ps(_mm_mul_ps(v[row], v[col]), weight));
+ MAT(A, n, row, col) = MAT(A, n, row, col) + v[row] * v[col] * weight;
}
}
}
-ccl_device_inline void math_vector_add_sse(__m128 *V, int n, const __m128 *ccl_restrict a)
+ccl_device_inline void math_vector_add_sse(float4 *V, int n, const float4 *ccl_restrict a)
{
for(int i = 0; i < n; i++) {
- V[i] = _mm_add_ps(V[i], a[i]);
+ V[i] += a[i];
}
}
-ccl_device_inline void math_vector_mul_sse(__m128 *V, int n, const __m128 *ccl_restrict a)
+ccl_device_inline void math_vector_mul_sse(float4 *V, int n, const float4 *ccl_restrict a)
{
for(int i = 0; i < n; i++) {
- V[i] = _mm_mul_ps(V[i], a[i]);
+ V[i] *= a[i];
}
}
-ccl_device_inline void math_vector_max_sse(__m128 *a, const __m128 *ccl_restrict b, int n)
+ccl_device_inline void math_vector_max_sse(float4 *a, const float4 *ccl_restrict b, int n)
{
for(int i = 0; i < n; i++) {
- a[i] = _mm_max_ps(a[i], b[i]);
+ a[i] = max(a[i], b[i]);
}
}
-ccl_device_inline void math_matrix_hsum(float *A, int n, const __m128 *ccl_restrict B)
+ccl_device_inline void math_matrix_hsum(float *A, int n, const float4 *ccl_restrict B)
{
for(int row = 0; row < n; row++) {
for(int col = 0; col <= row; col++) {
- MAT(A, n, row, col) = _mm_hsum_ss(MAT(B, n, row, col));
+ MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col))[0];
}
}
}