diff options
Diffstat (limited to 'intern/cycles/util')
-rw-r--r-- | intern/cycles/util/math_intersect.h | 92 |
1 files changed, 85 insertions, 7 deletions
diff --git a/intern/cycles/util/math_intersect.h b/intern/cycles/util/math_intersect.h index cc07cbe7745..aa28682f8c1 100644 --- a/intern/cycles/util/math_intersect.h +++ b/intern/cycles/util/math_intersect.h @@ -105,6 +105,51 @@ ccl_device bool ray_disk_intersect(float3 ray_P, return false; } +/* Custom rcp, cross and dot implementations that match Embree bit for bit. */ +ccl_device_forceinline float ray_triangle_rcp(const float x) +{ +#ifdef __KERNEL_NEON__ + /* Move scalar to vector register and do rcp. */ + __m128 a; + a[0] = x; + float32x4_t reciprocal = vrecpeq_f32(a); + reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal); + reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal); + return reciprocal[0]; +#elif defined(__KERNEL_SSE__) + const __m128 a = _mm_set_ss(x); + const __m128 r = _mm_rcp_ss(a); + +# ifdef __KERNEL_AVX2_ + return _mm_cvtss_f32(_mm_mul_ss(r, _mm_fnmadd_ss(r, a, _mm_set_ss(2.0f)))); +# else + return _mm_cvtss_f32(_mm_mul_ss(r, _mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a)))); +# endif +#else + return 1.0f / x; +#endif +} + +ccl_device_inline float ray_triangle_dot(const float3 a, const float3 b) +{ +#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) + return madd(ssef(a.x), ssef(b.x), madd(ssef(a.y), ssef(b.y), ssef(a.z) * ssef(b.z)))[0]; +#else + return a.x * b.x + a.y * b.y + a.z * b.z; +#endif +} + +ccl_device_inline float3 ray_triangle_cross(const float3 a, const float3 b) +{ +#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) + return make_float3(msub(ssef(a.y), ssef(b.z), ssef(a.z) * ssef(b.y))[0], + msub(ssef(a.z), ssef(b.x), ssef(a.x) * ssef(b.z))[0], + msub(ssef(a.x), ssef(b.y), ssef(a.y) * ssef(b.x))[0]); +#else + return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); +#endif +} + ccl_device_forceinline bool ray_triangle_intersect(const float3 ray_P, const float3 ray_D, const float ray_tmin, @@ -130,9 +175,9 @@ ccl_device_forceinline bool ray_triangle_intersect(const float3 ray_P, const float3 e2 = v1 - v2; /* Perform edge tests. */ - const float U = dot(cross(e0, v2 + v0), ray_D); - const float V = dot(cross(e1, v0 + v1), ray_D); - const float W = dot(cross(e2, v1 + v2), ray_D); + const float U = ray_triangle_dot(ray_triangle_cross(e0, v2 + v0), ray_D); + const float V = ray_triangle_dot(ray_triangle_cross(e1, v0 + v1), ray_D); + const float W = ray_triangle_dot(ray_triangle_cross(e2, v1 + v2), ray_D); const float UVW = U + V + W; const float eps = FLT_EPSILON * fabsf(UVW); @@ -144,7 +189,7 @@ ccl_device_forceinline bool ray_triangle_intersect(const float3 ray_P, } /* Calculate geometry normal and denominator. */ - const float3 Ng1 = cross(e1, e0); + const float3 Ng1 = ray_triangle_cross(e1, e0); const float3 Ng = Ng1 + Ng1; const float den = dot(Ng, ray_D); /* Avoid division by 0. */ @@ -159,13 +204,46 @@ ccl_device_forceinline bool ray_triangle_intersect(const float3 ray_P, return false; } - const float rcp_UVW = (fabsf(UVW) < 1e-18f) ? 0.0f : 1.0f / UVW; - *isect_u = min(U * rcp_UVW, 1.0f); - *isect_v = min(V * rcp_UVW, 1.0f); + const float rcp_uvw = (fabsf(UVW) < 1e-18f) ? 0.0f : ray_triangle_rcp(UVW); + *isect_u = min(U * rcp_uvw, 1.0f); + *isect_v = min(V * rcp_uvw, 1.0f); *isect_t = t; return true; } +ccl_device_forceinline bool ray_triangle_intersect_self(const float3 ray_P, + const float3 ray_D, + const float3 tri_a, + const float3 tri_b, + const float3 tri_c) +{ + /* Matches logic in ray_triangle_intersect, self intersection test to validate + * if a ray is going to hit self or might incorrectly hit a neighboring triangle. */ + + /* Calculate vertices relative to ray origin. */ + const float3 v0 = tri_a - ray_P; + const float3 v1 = tri_b - ray_P; + const float3 v2 = tri_c - ray_P; + + /* Calculate triangle edges. */ + const float3 e0 = v2 - v0; + const float3 e1 = v0 - v1; + const float3 e2 = v1 - v2; + + /* Perform edge tests. */ + const float U = ray_triangle_dot(ray_triangle_cross(v2 + v0, e0), ray_D); + const float V = ray_triangle_dot(ray_triangle_cross(v0 + v1, e1), ray_D); + const float W = ray_triangle_dot(ray_triangle_cross(v1 + v2, e2), ray_D); + + const float eps = FLT_EPSILON * fabsf(U + V + W); + const float minUVW = min(U, min(V, W)); + const float maxUVW = max(U, max(V, W)); + + /* Note the extended epsilon compared to ray_triangle_intersect, to account + * for intersections with neighboring triangles that have an epsilon. */ + return (minUVW >= eps || maxUVW <= -eps); +} + /* Tests for an intersection between a ray and a quad defined by * its midpoint, normal and sides. * If ellipse is true, hits outside the ellipse that's enclosed by the |