diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2017-08-02 03:23:03 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2017-08-07 15:01:24 +0300 |
commit | a24fbf3323101cd35332161b12a04e687b5583e4 (patch) | |
tree | 8df25d33db76f5a583589a1337cdb6eec0cec571 /intern/cycles/kernel/filter | |
parent | a8cc0d707e82ac781f44bf6cd7ed1e8974d8ed39 (diff) |
Code refactor: add, remove, optimize various SSE functions.
* Remove some unnecessary SSE emulation defines.
* Use full precision float division so we can enable it.
* Add sqrt(), sqr(), fabs(), shuffle variations, mask().
* Optimize reduce_add(), select().
Differential Revision: https://developer.blender.org/D2764
Diffstat (limited to 'intern/cycles/kernel/filter')
-rw-r--r-- | intern/cycles/kernel/filter/filter_features_sse.h | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h index 3185330994c..27e220923a0 100644 --- a/intern/cycles/kernel/filter/filter_features_sse.h +++ b/intern/cycles/kernel/filter/filter_features_sse.h @@ -92,13 +92,13 @@ ccl_device_inline void filter_get_feature_scales_sse(__m128 x, __m128 y, ccl_device_inline void filter_calculate_scale_sse(__m128 *scale) { - scale[0] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(scale[0]), _mm_set1_ps(0.01f))); - scale[1] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(scale[1]), _mm_set1_ps(0.01f))); - scale[2] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(scale[2]), _mm_set1_ps(0.01f))); - scale[6] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(scale[4]), _mm_set1_ps(0.01f))); + scale[0] = _mm_div_ps(_mm_set1_ps(1.0f), _mm_max_ps(_mm_hmax_ps(scale[0]), _mm_set1_ps(0.01f))); + scale[1] = _mm_div_ps(_mm_set1_ps(1.0f), _mm_max_ps(_mm_hmax_ps(scale[1]), _mm_set1_ps(0.01f))); + scale[2] = _mm_div_ps(_mm_set1_ps(1.0f), _mm_max_ps(_mm_hmax_ps(scale[2]), _mm_set1_ps(0.01f))); + scale[6] = _mm_div_ps(_mm_set1_ps(1.0f), _mm_max_ps(_mm_hmax_ps(scale[4]), _mm_set1_ps(0.01f))); - scale[7] = scale[8] = scale[9] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(_mm_sqrt_ps(scale[5])), _mm_set1_ps(0.01f))); - scale[3] = scale[4] = scale[5] = _mm_rcp_ps(_mm_max_ps(_mm_hmax_ps(_mm_sqrt_ps(scale[3])), _mm_set1_ps(0.01f))); + scale[7] = scale[8] = scale[9] = _mm_div_ps(_mm_set1_ps(1.0f), _mm_max_ps(_mm_hmax_ps(_mm_sqrt_ps(scale[5])), _mm_set1_ps(0.01f))); + scale[3] = scale[4] = scale[5] = _mm_div_ps(_mm_set1_ps(1.0f), _mm_max_ps(_mm_hmax_ps(_mm_sqrt_ps(scale[3])), _mm_set1_ps(0.01f))); } |