Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoseph Eagar <joeedh@gmail.com>2022-11-12 01:18:46 +0300
committerJoseph Eagar <joeedh@gmail.com>2022-11-12 01:18:46 +0300
commitc29795452cc71cb9f5a571a4aff0f593a2d7acaf (patch)
tree53a46bb77f3102c545f7e55d3344e310b3bf6116 /intern/cycles/util/transform_inverse.h
parent9980fd0b8e1f3a07060316f28469f55a3f2fc0cd (diff)
parent03ccf37162d365f3fdc8d8cd0cd6e9ff314fec6e (diff)
Merge branch 'master' into temp-sculpt-roll-mappingtemp-sculpt-roll-mapping
Diffstat (limited to 'intern/cycles/util/transform_inverse.h')
-rw-r--r--intern/cycles/util/transform_inverse.h27
1 files changed, 17 insertions, 10 deletions
diff --git a/intern/cycles/util/transform_inverse.h b/intern/cycles/util/transform_inverse.h
index bb410a6daef..2faac576d82 100644
--- a/intern/cycles/util/transform_inverse.h
+++ b/intern/cycles/util/transform_inverse.h
@@ -9,26 +9,33 @@ CCL_NAMESPACE_BEGIN
* Normally we don't use SSE41/AVX outside the kernel, but for this it's
* important to match exactly for ray tracing precision. */
-ccl_device_forceinline float3 transform_inverse_cross(const float3 a, const float3 b)
+ccl_device_forceinline float3 transform_inverse_cross(const float3 a_, const float3 b_)
{
#if defined(__AVX2__) && defined(__KERNEL_SSE2__)
- const ssef sse_a = (const __m128 &)a;
- const ssef sse_b = (const __m128 &)b;
- const ssef r = shuffle<1, 2, 0, 3>(
- ssef(_mm_fmsub_ps(sse_a, shuffle<1, 2, 0, 3>(sse_b), shuffle<1, 2, 0, 3>(sse_a) * sse_b)));
+ const __m128 a = (const __m128 &)a_;
+ const __m128 b = (const __m128 &)b_;
+ const __m128 a_shuffle = _mm_castsi128_ps(
+ _mm_shuffle_epi32(_mm_castps_si128(a), _MM_SHUFFLE(3, 0, 2, 1)));
+ const __m128 b_shuffle = _mm_castsi128_ps(
+ _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(3, 0, 2, 1)));
+ const __m128 r = _mm_castsi128_ps(
+ _mm_shuffle_epi32(_mm_castps_si128(_mm_fmsub_ps(a, b_shuffle, _mm_mul_ps(a_shuffle, b))),
+ _MM_SHUFFLE(3, 0, 2, 1)));
return (const float3 &)r;
#endif
- return cross(a, b);
+ return cross(a_, b_);
}
-ccl_device_forceinline float transform_inverse_dot(const float3 a, const float3 b)
+ccl_device_forceinline float transform_inverse_dot(const float3 a_, const float3 b_)
{
-#ifdef __SSE4_1__
- return _mm_cvtss_f32(_mm_dp_ps((const __m128 &)a, (const __m128 &)b, 0x7F));
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+ const __m128 a = (const __m128 &)a_;
+ const __m128 b = (const __m128 &)b_;
+ return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
#endif
- return dot(a, b);
+ return dot(a_, b_);
}
ccl_device_forceinline Transform transform_inverse_impl(const Transform tfm)