From 5c6a14f4e520e2eca73ff3a2c4b6186ee4dab40a Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Mon, 19 Sep 2016 15:29:37 +0200 Subject: Cycles: More tweaks to make specialized BVH traversal matching --- intern/cycles/kernel/bvh/qbvh_shadow_all.h | 18 ++++++++---------- intern/cycles/kernel/bvh/qbvh_volume.h | 3 ++- intern/cycles/kernel/bvh/qbvh_volume_all.h | 8 +++++--- 3 files changed, 15 insertions(+), 14 deletions(-) (limited to 'intern') diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h index 03c678bd986..3136c495b38 100644 --- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h @@ -40,6 +40,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, uint *num_hits) { /* TODO(sergey): + * - Test if pushing distance on the stack helps. * - Likely and unlikely for if() statements. * - Test restrict attribute for pointers. */ @@ -77,7 +78,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, int num_hits_in_instance = 0; #endif - ssef tnear(0.0f), tfar(tmax); + ssef tnear(0.0f), tfar(isect_t); #if BVH_FEATURE(BVH_HAIR) sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); #endif @@ -125,12 +126,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, #ifdef __KERNEL_AVX2__ P_idir4, #endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) +#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) org4, -# endif -# if BVH_FEATURE(BVH_HAIR) +#endif +#if BVH_FEATURE(BVH_HAIR) dir4, -# endif +#endif idir4, near_x, near_y, near_z, far_x, far_y, far_z, @@ -430,21 +431,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Instance pop. */ if(num_hits_in_instance) { float t_fac; - # if BVH_FEATURE(BVH_MOTION) bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); # else bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); # endif - - /* scale isect->t to adjust for instancing */ + /* Scale isect->t to adjust for instancing. */ for(int i = 0; i < num_hits_in_instance; i++) { (isect_array-i-1)->t *= t_fac; } } else { float ignore_t = FLT_MAX; - # if BVH_FEATURE(BVH_MOTION) bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); # else @@ -458,7 +456,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(tmax); + tfar = ssef(isect_t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); # endif diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h index 847a11d8ad4..aff9f559ab8 100644 --- a/intern/cycles/kernel/bvh/qbvh_volume.h +++ b/intern/cycles/kernel/bvh/qbvh_volume.h @@ -103,8 +103,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, do { /* Traverse internal nodes. */ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { -#ifdef __VISIBILITY_FLAG__ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); + +#ifdef __VISIBILITY_FLAG__ if((__float_as_uint(inodes.x) & visibility) == 0) { /* Pop. */ node_addr = traversal_stack[stack_ptr].addr; diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h index 0fe4454b01e..26da0145020 100644 --- a/intern/cycles/kernel/bvh/qbvh_volume_all.h +++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h @@ -107,8 +107,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, do { /* Traverse internal nodes. */ while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { -#ifdef __VISIBILITY_FLAG__ float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); + +#ifdef __VISIBILITY_FLAG__ if((__float_as_uint(inodes.x) & visibility) == 0) { /* Pop. */ node_addr = traversal_stack[stack_ptr].addr; @@ -419,6 +420,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, # endif } + isect_t = tmax; + isect_array->t = isect_t; + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } @@ -436,8 +440,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, # endif triangle_intersect_precalc(dir, &isect_precalc); - isect_t = tmax; - isect_array->t = isect_t; object = OBJECT_NONE; node_addr = traversal_stack[stack_ptr].addr; -- cgit v1.2.3