diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-02-25 17:12:11 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-03-25 15:42:13 +0300 |
commit | 0e47e0cc9e9b19a30717042d97cb3b8fb50132ff (patch) | |
tree | e735d1d52faf96df09e292e5338f4a4af1499de3 /intern/cycles/kernel | |
parent | 712a2579944fb9ee00fe3c6c34f07f2800361396 (diff) |
Cycles: Use dedicated BVH for subsurface ray casting
This commit makes it so casting subsurface rays will totally ignore all
the BVH nodes and primitives which do not belong to a current object,
making it much simpler traversal code and reduces number of intersection
tests.
Reviewers: brecht, juicyfruit, dingto, lukasstockner97
Differential Revision: https://developer.blender.org/D1823
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh.h | 75 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh_subsurface.h | 183 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_qbvh_subsurface.h | 180 |
3 files changed, 115 insertions, 323 deletions
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h index d9f4076769e..ad983995cc9 100644 --- a/intern/cycles/kernel/geom/geom_bvh.h +++ b/intern/cycles/kernel/geom/geom_bvh.h @@ -91,27 +91,9 @@ CCL_NAMESPACE_BEGIN #include "geom_bvh_subsurface.h" #endif -#if defined(__SUBSURFACE__) && defined(__INSTANCING__) -#define BVH_FUNCTION_NAME bvh_intersect_subsurface_instancing -#define BVH_FUNCTION_FEATURES BVH_INSTANCING -#include "geom_bvh_subsurface.h" -#endif - -#if defined(__SUBSURFACE__) && defined(__HAIR__) -#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR -#include "geom_bvh_subsurface.h" -#endif - #if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__) #define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION -#include "geom_bvh_subsurface.h" -#endif - -#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) -#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair_motion -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION +#define BVH_FUNCTION_FEATURES BVH_MOTION #include "geom_bvh_subsurface.h" #endif @@ -269,17 +251,6 @@ ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg, { #ifdef __OBJECT_MOTION__ if(kernel_data.bvh.have_motion) { -#ifdef __HAIR__ - if(kernel_data.bvh.have_curves) { - return bvh_intersect_subsurface_hair_motion(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); - } -#endif /* __HAIR__ */ - return bvh_intersect_subsurface_motion(kg, ray, ss_isect, @@ -288,56 +259,12 @@ ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg, max_hits); } #endif /* __OBJECT_MOTION__ */ - -#ifdef __HAIR__ - if(kernel_data.bvh.have_curves) { - return bvh_intersect_subsurface_hair(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); - } -#endif /* __HAIR__ */ - -#ifdef __KERNEL_CPU__ - -#ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) { - return bvh_intersect_subsurface_instancing(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); - } -#endif /* __INSTANCING__ */ - - return bvh_intersect_subsurface(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); -#else /* __KERNEL_CPU__ */ - -#ifdef __INSTANCING__ - return bvh_intersect_subsurface_instancing(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); -#else return bvh_intersect_subsurface(kg, ray, ss_isect, subsurface_object, lcg_state, max_hits); -#endif /* __INSTANCING__ */ - -#endif /* __KERNEL_CPU__ */ } #endif diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h index b9f1a46afb6..43809201761 100644 --- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h +++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h @@ -25,7 +25,6 @@ * various features can be enabled/disabled. This way we can compile optimized * versions for each case without new features slowing things down. * - * BVH_INSTANCING: object instancing * BVH_MOTION: motion blur rendering * */ @@ -41,17 +40,16 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, * - test if pushing distance on the stack helps (for non shadow rays) * - separate version for shadow rays * - likely and unlikely for if() statements - * - SSE for hair * - test restrict attribute for pointers */ - + /* traversal stack in CUDA thread-local memory */ int traversalStack[BVH_STACK_SIZE]; traversalStack[0] = ENTRYPOINT_SENTINEL; /* traversal variables in registers */ int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; + int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object); /* ray parameters in registers */ float3 P = ray->P; @@ -62,14 +60,28 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, ss_isect->num_hits = 0; + const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object); + if(!(object_flag & SD_TRANSFORM_APPLIED)) { #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; + bvh_instance_motion_push(kg, + subsurface_object, + ray, + &P, + &dir, + &idir, + &isect_t, + &ob_itfm); +#else + bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t); #endif + object = subsurface_object; + } #if defined(__KERNEL_SSE2__) const shuffle_swap_t shuf_identity = shuffle_swap_identity(); const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); ssef Psplat[3], idirsplat[3]; shuffle_swap_t shufflexyz[3]; @@ -190,133 +202,56 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); int primAddr = __float_as_int(leaf.x); -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - const int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* only primitives from the same object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - if(tri_object != subsurface_object) - continue; - triangle_intersect_subsurface(kg, - &isect_precalc, - ss_isect, - P, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + triangle_intersect_subsurface(kg, + &isect_precalc, + ss_isect, + P, + object, + primAddr, + isect_t, + lcg_state, + max_hits); } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* only primitives from the same object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - if(tri_object != subsurface_object) - continue; - motion_triangle_intersect_subsurface(kg, - ss_isect, - P, - dir, - ray->time, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; - } -#endif - default: { - break; + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + motion_triangle_intersect_subsurface(kg, + ss_isect, + P, + dir, + ray->time, + object, + primAddr, + isect_t, + lcg_state, + max_hits); } + break; } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) { - object = subsurface_object; - -#if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -#else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); -#endif - triangle_intersect_precalc(dir, &isect_precalc); - -#if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - else { - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; + default: { + break; } } } -#endif /* FEATURE(BVH_INSTANCING) */ } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* instance pop */ -#if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -#else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t); -#endif - - triangle_intersect_precalc(dir, &isect_precalc); - -#if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ } while(nodeAddr != ENTRYPOINT_SENTINEL); } diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h index 98e1d27b79e..84512a8783c 100644 --- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h +++ b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h @@ -21,7 +21,6 @@ * various features can be enabled/disabled. This way we can compile optimized * versions for each case without new features slowing things down. * - * BVH_INSTANCING: object instancing * BVH_MOTION: motion blur rendering * */ @@ -47,7 +46,7 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Traversal variables in registers. */ int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; + int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object); /* Ray parameters in registers. */ float3 P = ray->P; @@ -58,9 +57,23 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, ss_isect->num_hits = 0; + const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object); + if(!(object_flag & SD_TRANSFORM_APPLIED)) { #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; + bvh_instance_motion_push(kg, + subsurface_object, + ray, + &P, + &dir, + &idir, + &isect_t, + &ob_itfm); +#else + bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t); #endif + object = subsurface_object; + } #ifndef __KERNEL_SSE41__ if(!isfinite(P.x)) { @@ -206,137 +219,54 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); int primAddr = __float_as_int(leaf.x); -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; - /* Primitive intersection. */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* Intersect ray against primitive, */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* Only primitives from the same object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - if(tri_object != subsurface_object) { - continue; - } - triangle_intersect_subsurface(kg, - &isect_precalc, - ss_isect, - P, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; + /* Primitive intersection. */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* Intersect ray against primitive, */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + triangle_intersect_subsurface(kg, + &isect_precalc, + ss_isect, + P, + object, + primAddr, + isect_t, + lcg_state, + max_hits); } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* Intersect ray against primitive. */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* Only primitives from the same object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - if(tri_object != subsurface_object) { - continue; - } - motion_triangle_intersect_subsurface(kg, - ss_isect, - P, - dir, - ray->time, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; + case PRIMITIVE_MOTION_TRIANGLE: { + /* Intersect ray against primitive. */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + motion_triangle_intersect_subsurface(kg, + ss_isect, + P, + dir, + ray->time, + object, + primAddr, + isect_t, + lcg_state, + max_hits); } -#endif - default: - break; + break; } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) { - object = subsurface_object; - -#if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -#else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); -#endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect_t); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -#ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -#else - org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); #endif - triangle_intersect_precalc(dir, &isect_precalc); - - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - else { - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - + default: + break; } } -#endif /* FEATURE(BVH_INSTANCING) */ } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* Instance pop. */ -#if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -#else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t); -#endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect_t); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -#ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -#else - org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -#endif - triangle_intersect_precalc(dir, &isect_precalc); - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ } while(nodeAddr != ENTRYPOINT_SENTINEL); } |