Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2017-03-23 18:16:05 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2017-03-23 19:58:03 +0300
commitba8c7d2ba120f5ba7fdbaef67fe2e301ec3e5dcb (patch)
treef2cb920306275c6608c2b03a40c277b9fa584fbd /intern/cycles/kernel/geom
parenta1348dde2ed27d0a8a1d62f9e17602857b1f19f1 (diff)
Cycles: Use SSE-optimized version of triangle intersection for motion triangles
The title says it all actually. Gives up to 10% speedup on test scenes here on i7-6800K. Render times on GPU are unreliable here, but there might be some slowdown caused by watertight nature of intersections.
Diffstat (limited to 'intern/cycles/kernel/geom')
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle_intersect.h47
1 files changed, 28 insertions, 19 deletions
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
index 2500228281e..971a34308f1 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -166,14 +166,15 @@ float3 motion_triangle_refine_subsurface(KernelGlobals *kg,
* time and do a ray intersection with the resulting triangle.
*/
-ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
- Intersection *isect,
- float3 P,
- float3 dir,
- float time,
- uint visibility,
- int object,
- int prim_addr)
+ccl_device_inline bool motion_triangle_intersect(
+ KernelGlobals *kg,
+ const TriangleIsectPrecalc *isect_precalc,
+ Intersection *isect,
+ float3 P,
+ float time,
+ uint visibility,
+ int object,
+ int prim_addr)
{
/* Primitive index for vertex location lookup. */
int prim = kernel_tex_fetch(__prim_index, prim_addr);
@@ -185,11 +186,15 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
- if(ray_triangle_intersect_uv(P,
- dir,
- isect->t,
- verts[2], verts[0], verts[1],
- &u, &v, &t))
+ if(ray_triangle_intersect(isect_precalc,
+ P,
+ isect->t,
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ (ssef*)verts,
+#else
+ verts[0], verts[1], verts[2],
+#endif
+ &u, &v, &t))
{
#ifdef __VISIBILITY_FLAG__
/* Visibility flag test. we do it here under the assumption
@@ -217,9 +222,9 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
#ifdef __SUBSURFACE__
ccl_device_inline void motion_triangle_intersect_subsurface(
KernelGlobals *kg,
+ const TriangleIsectPrecalc *isect_precalc,
SubsurfaceIntersection *ss_isect,
float3 P,
- float3 dir,
float time,
int object,
int prim_addr,
@@ -237,11 +242,15 @@ ccl_device_inline void motion_triangle_intersect_subsurface(
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
- if(ray_triangle_intersect_uv(P,
- dir,
- tmax,
- verts[2], verts[0], verts[1],
- &u, &v, &t))
+ if(ray_triangle_intersect(isect_precalc,
+ P,
+ tmax,
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ (ssef*)verts,
+#else
+ verts[0], verts[1], verts[2],
+#endif
+ &u, &v, &t))
{
for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
if(ss_isect->hits[i].t == t) {