Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2016-10-26 23:14:41 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-10-26 23:14:41 +0300
commit7e380ad4c0236e6e572023e85694acec3da28e6e (patch)
tree2659ad5a4b56d385fd856300f1b40a5435a27fad /intern
parentde22e55291029126d1964de1c616d6b70a62e6bf (diff)
Cycles: Another attempt to fix crashes on AVX2 processors
Basically don't use rcp() in areas which seems to be critical after second look. Also disabled some multiplication operators, not sure yet why they might be a problem. Tomorrow will be setting up a full test with all cases which were buggy in our farm to see if this fix is complete.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/kernel/geom/geom_object.h3
-rw-r--r--intern/cycles/util/util_math.h22
-rw-r--r--intern/cycles/util/util_transform.h5
3 files changed, 16 insertions, 14 deletions
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index cb2de3a7e87..32900f7f27a 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -397,7 +397,8 @@ ccl_device_inline float3 bvh_clamp_direction(float3 dir)
ccl_device_inline float3 bvh_inverse_direction(float3 dir)
{
-#ifdef __KERNEL_SSE__
+ /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
+#if defined(__KERNEL_SSE__) && 0
return rcp(dir);
#else
return 1.0f / dir;
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 57cad39d1eb..f0c7492d88a 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -453,8 +453,9 @@ ccl_device_inline float3 operator*(const float3& a, const float f)
ccl_device_inline float3 operator*(const float f, const float3& a)
{
-#ifdef __KERNEL_SSE__
- return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f)));
+ /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
+#if defined(__KERNEL_SSE__) && 0
+ return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
#else
return make_float3(a.x*f, a.y*f, a.z*f);
#endif
@@ -462,13 +463,13 @@ ccl_device_inline float3 operator*(const float f, const float3& a)
ccl_device_inline float3 operator/(const float f, const float3& a)
{
- /* TODO(sergey): Currently disabled, gives speedup but makes intersection tets non-watertight. */
-// #ifdef __KERNEL_SSE__
-// __m128 rc = _mm_rcp_ps(a.m128);
-// return float3(_mm_mul_ps(_mm_set1_ps(f),rc));
-// #else
+ /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
+#if defined(__KERNEL_SSE__) && 0
+ __m128 rc = _mm_rcp_ps(a.m128);
+ return float3(_mm_mul_ps(_mm_set1_ps(f),rc));
+#else
return make_float3(f / a.x, f / a.y, f / a.z);
-// #endif
+#endif
}
ccl_device_inline float3 operator/(const float3& a, const float f)
@@ -479,7 +480,8 @@ ccl_device_inline float3 operator/(const float3& a, const float f)
ccl_device_inline float3 operator/(const float3& a, const float3& b)
{
-#ifdef __KERNEL_SSE__
+ /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
+#if defined(__KERNEL_SSE__) && 0
__m128 rc = _mm_rcp_ps(b.m128);
return float3(_mm_mul_ps(a, rc));
#else
@@ -799,7 +801,7 @@ ccl_device_inline float4 operator*(const float4& a, const float4& b)
ccl_device_inline float4 operator*(const float4& a, float f)
{
-#ifdef __KERNEL_SSE__
+#if defined(__KERNEL_SSE__)
return a * make_float4(f);
#else
return make_float4(a.x*f, a.y*f, a.z*f, a.w*f);
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index ea5eb3b25b0..a0695f20488 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -74,7 +74,7 @@ ccl_device_inline float3 transform_perspective(const Transform *t, const float3
ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
{
/* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) && 0
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
ssef x, y, z, w, aa;
aa = a.m128;
@@ -103,8 +103,7 @@ ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
ccl_device_inline float3 transform_direction(const Transform *t, const float3 a)
{
- /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) && 0
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
ssef x, y, z, w, aa;
aa = a.m128;
x = _mm_loadu_ps(&t->x.x);