From 4355603790712032e89fa4da6d8ce7f3ede62b4f Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Mon, 11 Jul 2016 12:28:45 +0200 Subject: Cycles: Move BVK kernel files to own directory BVH traversal is not really that much a geometry and we've got quite some traversals now. Makes sense to keep them separate in the name of source structure clarity. --- intern/cycles/kernel/CMakeLists.txt | 40 +- intern/cycles/kernel/bvh/bvh.h | 424 +++++++++++++++ intern/cycles/kernel/bvh/bvh_nodes.h | 656 +++++++++++++++++++++++ intern/cycles/kernel/bvh/bvh_shadow.h | 386 +++++++++++++ intern/cycles/kernel/bvh/bvh_subsurface.h | 266 +++++++++ intern/cycles/kernel/bvh/bvh_traversal.h | 428 +++++++++++++++ intern/cycles/kernel/bvh/bvh_volume.h | 324 +++++++++++ intern/cycles/kernel/bvh/bvh_volume_all.h | 397 ++++++++++++++ intern/cycles/kernel/bvh/qbvh_nodes.h | 433 +++++++++++++++ intern/cycles/kernel/bvh/qbvh_shadow.h | 449 ++++++++++++++++ intern/cycles/kernel/bvh/qbvh_subsurface.h | 299 +++++++++++ intern/cycles/kernel/bvh/qbvh_traversal.h | 465 ++++++++++++++++ intern/cycles/kernel/bvh/qbvh_volume.h | 374 +++++++++++++ intern/cycles/kernel/bvh/qbvh_volume_all.h | 446 +++++++++++++++ intern/cycles/kernel/geom/geom.h | 9 - intern/cycles/kernel/geom/geom_bvh.h | 417 -------------- intern/cycles/kernel/geom/geom_bvh_nodes.h | 656 ----------------------- intern/cycles/kernel/geom/geom_bvh_shadow.h | 386 ------------- intern/cycles/kernel/geom/geom_bvh_subsurface.h | 266 --------- intern/cycles/kernel/geom/geom_bvh_traversal.h | 428 --------------- intern/cycles/kernel/geom/geom_bvh_volume.h | 324 ----------- intern/cycles/kernel/geom/geom_bvh_volume_all.h | 397 -------------- intern/cycles/kernel/geom/geom_qbvh.h | 433 --------------- intern/cycles/kernel/geom/geom_qbvh_shadow.h | 449 ---------------- intern/cycles/kernel/geom/geom_qbvh_subsurface.h | 299 ----------- intern/cycles/kernel/geom/geom_qbvh_traversal.h | 465 ---------------- intern/cycles/kernel/geom/geom_qbvh_volume.h | 374 ------------- intern/cycles/kernel/geom/geom_qbvh_volume_all.h | 446 --------------- intern/cycles/kernel/kernel_path.h | 1 + intern/cycles/kernel/kernels/opencl/kernel.cl | 1 + intern/cycles/kernel/osl/osl_services.cpp | 1 + intern/cycles/kernel/split/kernel_split_common.h | 1 + 32 files changed, 5377 insertions(+), 5363 deletions(-) create mode 100644 intern/cycles/kernel/bvh/bvh.h create mode 100644 intern/cycles/kernel/bvh/bvh_nodes.h create mode 100644 intern/cycles/kernel/bvh/bvh_shadow.h create mode 100644 intern/cycles/kernel/bvh/bvh_subsurface.h create mode 100644 intern/cycles/kernel/bvh/bvh_traversal.h create mode 100644 intern/cycles/kernel/bvh/bvh_volume.h create mode 100644 intern/cycles/kernel/bvh/bvh_volume_all.h create mode 100644 intern/cycles/kernel/bvh/qbvh_nodes.h create mode 100644 intern/cycles/kernel/bvh/qbvh_shadow.h create mode 100644 intern/cycles/kernel/bvh/qbvh_subsurface.h create mode 100644 intern/cycles/kernel/bvh/qbvh_traversal.h create mode 100644 intern/cycles/kernel/bvh/qbvh_volume.h create mode 100644 intern/cycles/kernel/bvh/qbvh_volume_all.h delete mode 100644 intern/cycles/kernel/geom/geom_bvh.h delete mode 100644 intern/cycles/kernel/geom/geom_bvh_nodes.h delete mode 100644 intern/cycles/kernel/geom/geom_bvh_shadow.h delete mode 100644 intern/cycles/kernel/geom/geom_bvh_subsurface.h delete mode 100644 intern/cycles/kernel/geom/geom_bvh_traversal.h delete mode 100644 intern/cycles/kernel/geom/geom_bvh_volume.h delete mode 100644 intern/cycles/kernel/geom/geom_bvh_volume_all.h delete mode 100644 intern/cycles/kernel/geom/geom_qbvh.h delete mode 100644 intern/cycles/kernel/geom/geom_qbvh_shadow.h delete mode 100644 intern/cycles/kernel/geom/geom_qbvh_subsurface.h delete mode 100644 intern/cycles/kernel/geom/geom_qbvh_traversal.h delete mode 100644 intern/cycles/kernel/geom/geom_qbvh_volume.h delete mode 100644 intern/cycles/kernel/geom/geom_qbvh_volume_all.h (limited to 'intern') diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 3c2f7747f34..3f0917bb992 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -28,6 +28,22 @@ set(SRC kernels/cuda/kernel.cu ) +set(SRC_BVH_HEADERS + bvh/bvh.h + bvh/bvh_nodes.h + bvh/bvh_shadow.h + bvh/bvh_subsurface.h + bvh/bvh_traversal.h + bvh/bvh_volume.h + bvh/bvh_volume_all.h + bvh/qbvh_nodes.h + bvh/qbvh_shadow.h + bvh/qbvh_subsurface.h + bvh/qbvh_traversal.h + bvh/qbvh_volume.h + bvh/qbvh_volume_all.h +) + set(SRC_HEADERS kernel_accumulate.h kernel_bake.h @@ -140,24 +156,11 @@ set(SRC_SVM_HEADERS set(SRC_GEOM_HEADERS geom/geom.h geom/geom_attribute.h - geom/geom_bvh.h - geom/geom_bvh_nodes.h - geom/geom_bvh_shadow.h - geom/geom_bvh_subsurface.h - geom/geom_bvh_traversal.h - geom/geom_bvh_volume.h - geom/geom_bvh_volume_all.h geom/geom_curve.h geom/geom_motion_curve.h geom/geom_motion_triangle.h geom/geom_object.h geom/geom_primitive.h - geom/geom_qbvh.h - geom/geom_qbvh_shadow.h - geom/geom_qbvh_subsurface.h - geom/geom_qbvh_traversal.h - geom/geom_qbvh_volume.h - geom/geom_qbvh_volume_all.h geom/geom_triangle.h geom/geom_triangle_intersect.h geom/geom_volume.h @@ -213,7 +216,14 @@ if(WITH_CYCLES_CUDA_BINARIES) endif() # build for each arch - set(cuda_sources kernels/cuda/kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) + set(cuda_sources kernels/cuda/kernel.cu + ${SRC_HEADERS} + ${SRC_BVH_HEADERS} + ${SRC_SVM_HEADERS} + ${SRC_GEOM_HEADERS} + ${SRC_CLOSURE_HEADERS} + ${SRC_UTIL_HEADERS} + ) set(cuda_cubins) macro(CYCLES_CUDA_KERNEL_ADD arch experimental) @@ -313,6 +323,7 @@ add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_KERNELS_CPU_HEADERS} + ${SRC_BVH_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} @@ -347,6 +358,7 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteratio delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/bvh) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/geom) diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h new file mode 100644 index 00000000000..b1802596c5a --- /dev/null +++ b/intern/cycles/kernel/bvh/bvh.h @@ -0,0 +1,424 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation + * Modifications Copyright 2011, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* BVH + * + * Bounding volume hierarchy for ray tracing. We compile different variations + * of the same BVH traversal function for faster rendering when some types of + * primitives are not needed, using #includes to work around the lack of + * C++ templates in OpenCL. + * + * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs", + * the code has been extended and modified to support more primitives and work + * with CPU/CUDA/OpenCL. */ + +CCL_NAMESPACE_BEGIN + +/* Don't inline intersect functions on GPU, this is faster */ +#ifdef __KERNEL_GPU__ +# define ccl_device_intersect ccl_device_noinline +#else +# define ccl_device_intersect ccl_device_inline +#endif + +/* bottom-most stack entry, indicating the end of traversal */ +#define ENTRYPOINT_SENTINEL 0x76543210 + +/* 64 object BVH + 64 mesh BVH + 64 object node splitting */ +#define BVH_STACK_SIZE 192 +#define BVH_QSTACK_SIZE 384 + +/* BVH intersection function variations */ + +#define BVH_INSTANCING 1 +#define BVH_MOTION 2 +#define BVH_HAIR 4 +#define BVH_HAIR_MINIMUM_WIDTH 8 + +#define BVH_NAME_JOIN(x,y) x ## _ ## y +#define BVH_NAME_EVAL(x,y) BVH_NAME_JOIN(x,y) +#define BVH_FUNCTION_FULL_NAME(prefix) BVH_NAME_EVAL(prefix, BVH_FUNCTION_NAME) + +#define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0) + +/* Debugging heleprs */ +#ifdef __KERNEL_DEBUG__ +# define BVH_DEBUG_INIT() \ + do { \ + isect->num_traversal_steps = 0; \ + isect->num_traversed_instances = 0; \ + } while(0) +# define BVH_DEBUG_NEXT_STEP() \ + do { \ + ++isect->num_traversal_steps; \ + } while(0) +# define BVH_DEBUG_NEXT_INSTANCE() \ + do { \ + ++isect->num_traversed_instances; \ + } while(0) +#else /* __KERNEL_DEBUG__ */ +# define BVH_DEBUG_INIT() +# define BVH_DEBUG_NEXT_STEP() +# define BVH_DEBUG_NEXT_INSTANCE() +#endif /* __KERNEL_DEBUG__ */ + + +/* Common QBVH functions. */ +#ifdef __QBVH__ +# include "qbvh_nodes.h" +#endif + +/* Regular BVH traversal */ + +#include "bvh_nodes.h" + +#define BVH_FUNCTION_NAME bvh_intersect +#define BVH_FUNCTION_FEATURES 0 +#include "bvh_traversal.h" + +#if defined(__INSTANCING__) +# define BVH_FUNCTION_NAME bvh_intersect_instancing +# define BVH_FUNCTION_FEATURES BVH_INSTANCING +# include "bvh_traversal.h" +#endif + +#if defined(__HAIR__) +# define BVH_FUNCTION_NAME bvh_intersect_hair +# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH +# include "bvh_traversal.h" +#endif + +#if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +# include "bvh_traversal.h" +#endif + +#if defined(__HAIR__) && defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_hair_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION +# include "bvh_traversal.h" +#endif + +/* Subsurface scattering BVH traversal */ + +#if defined(__SUBSURFACE__) +# define BVH_FUNCTION_NAME bvh_intersect_subsurface +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "bvh_subsurface.h" +#endif + +#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR +# include "bvh_subsurface.h" +#endif + +/* Volume BVH traversal */ + +#if defined(__VOLUME__) +# define BVH_FUNCTION_NAME bvh_intersect_volume +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "bvh_volume.h" +#endif + +#if defined(__VOLUME__) && defined(__INSTANCING__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing +# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR +# include "bvh_volume.h" +#endif + +#if defined(__VOLUME__) && defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR +# include "bvh_volume.h" +#endif + +/* Record all intersections - Shadow BVH traversal */ + +#if defined(__SHADOW_RECORD_ALL__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all +# define BVH_FUNCTION_FEATURES 0 +# include "bvh_shadow.h" +#endif + +#if defined(__SHADOW_RECORD_ALL__) && defined(__INSTANCING__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing +# define BVH_FUNCTION_FEATURES BVH_INSTANCING +# include "bvh_shadow.h" +#endif + +#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair +# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR +# include "bvh_shadow.h" +#endif + +#if defined(__SHADOW_RECORD_ALL__) && defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +# include "bvh_shadow.h" +#endif + +#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION +# include "bvh_shadow.h" +#endif + +/* Record all intersections - Volume BVH traversal */ + +#if defined(__VOLUME_RECORD_ALL__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_all +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "bvh_volume_all.h" +#endif + +#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing +# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR +# include "bvh_volume_all.h" +#endif + +#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR +# include "bvh_volume_all.h" +#endif + +#undef BVH_FEATURE +#undef BVH_NAME_JOIN +#undef BVH_NAME_EVAL +#undef BVH_FUNCTION_FULL_NAME + +ccl_device_intersect bool scene_intersect(KernelGlobals *kg, + const Ray *ray, + const uint visibility, + Intersection *isect, + uint *lcg_state, + float difl, + float extmax) +{ +#ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { +# ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_hair_motion(kg, ray, isect, visibility, lcg_state, difl, extmax); +# endif /* __HAIR__ */ + + return bvh_intersect_motion(kg, ray, isect, visibility); + } +#endif /* __OBJECT_MOTION__ */ + +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_hair(kg, ray, isect, visibility, lcg_state, difl, extmax); +#endif /* __HAIR__ */ + +#ifdef __KERNEL_CPU__ + +# ifdef __INSTANCING__ + if(kernel_data.bvh.have_instancing) + return bvh_intersect_instancing(kg, ray, isect, visibility); +# endif /* __INSTANCING__ */ + + return bvh_intersect(kg, ray, isect, visibility); +#else /* __KERNEL_CPU__ */ + +# ifdef __INSTANCING__ + return bvh_intersect_instancing(kg, ray, isect, visibility); +# else + return bvh_intersect(kg, ray, isect, visibility); +# endif /* __INSTANCING__ */ + +#endif /* __KERNEL_CPU__ */ +} + +#ifdef __SUBSURFACE__ +ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg, + const Ray *ray, + SubsurfaceIntersection *ss_isect, + int subsurface_object, + uint *lcg_state, + int max_hits) +{ +#ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { + return bvh_intersect_subsurface_motion(kg, + ray, + ss_isect, + subsurface_object, + lcg_state, + max_hits); + } +#endif /* __OBJECT_MOTION__ */ + return bvh_intersect_subsurface(kg, + ray, + ss_isect, + subsurface_object, + lcg_state, + max_hits); +} +#endif + +#ifdef __SHADOW_RECORD_ALL__ +ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits) +{ +# ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { +# ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, max_hits, num_hits); +# endif /* __HAIR__ */ + + return bvh_intersect_shadow_all_motion(kg, ray, isect, max_hits, num_hits); + } +# endif /* __OBJECT_MOTION__ */ + +# ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_shadow_all_hair(kg, ray, isect, max_hits, num_hits); +# endif /* __HAIR__ */ + +# ifdef __INSTANCING__ + if(kernel_data.bvh.have_instancing) + return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits); +# endif /* __INSTANCING__ */ + + return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits); +} +#endif /* __SHADOW_RECORD_ALL__ */ + +#ifdef __VOLUME__ +ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility) +{ +# ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { + return bvh_intersect_volume_motion(kg, ray, isect, visibility); + } +# endif /* __OBJECT_MOTION__ */ +# ifdef __KERNEL_CPU__ +# ifdef __INSTANCING__ + if(kernel_data.bvh.have_instancing) + return bvh_intersect_volume_instancing(kg, ray, isect, visibility); +# endif /* __INSTANCING__ */ + return bvh_intersect_volume(kg, ray, isect, visibility); +# else /* __KERNEL_CPU__ */ +# ifdef __INSTANCING__ + return bvh_intersect_volume_instancing(kg, ray, isect, visibility); +# else + return bvh_intersect_volume(kg, ray, isect, visibility); +# endif /* __INSTANCING__ */ +# endif /* __KERNEL_CPU__ */ +} +#endif /* __VOLUME__ */ + +#ifdef __VOLUME_RECORD_ALL__ +ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint max_hits, + const uint visibility) +{ +# ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { + return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); + } +# endif /* __OBJECT_MOTION__ */ +# ifdef __INSTANCING__ + if(kernel_data.bvh.have_instancing) + return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility); +# endif /* __INSTANCING__ */ + return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); +} +#endif /* __VOLUME_RECORD_ALL__ */ + + +/* Ray offset to avoid self intersection. + * + * This function should be used to compute a modified ray start position for + * rays leaving from a surface. */ + +ccl_device_inline float3 ray_offset(float3 P, float3 Ng) +{ +#ifdef __INTERSECTION_REFINE__ + const float epsilon_f = 1e-5f; + /* ideally this should match epsilon_f, but instancing and motion blur + * precision makes it problematic */ + const float epsilon_test = 1.0f; + const int epsilon_i = 32; + + float3 res; + + /* x component */ + if(fabsf(P.x) < epsilon_test) { + res.x = P.x + Ng.x*epsilon_f; + } + else { + uint ix = __float_as_uint(P.x); + ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i; + res.x = __uint_as_float(ix); + } + + /* y component */ + if(fabsf(P.y) < epsilon_test) { + res.y = P.y + Ng.y*epsilon_f; + } + else { + uint iy = __float_as_uint(P.y); + iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i; + res.y = __uint_as_float(iy); + } + + /* z component */ + if(fabsf(P.z) < epsilon_test) { + res.z = P.z + Ng.z*epsilon_f; + } + else { + uint iz = __float_as_uint(P.z); + iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i; + res.z = __uint_as_float(iz); + } + + return res; +#else + const float epsilon_f = 1e-4f; + return P + epsilon_f*Ng; +#endif +} + +#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__) +/* ToDo: Move to another file? */ +ccl_device int intersections_compare(const void *a, const void *b) +{ + const Intersection *isect_a = (const Intersection*)a; + const Intersection *isect_b = (const Intersection*)b; + + if(isect_a->t < isect_b->t) + return -1; + else if(isect_a->t > isect_b->t) + return 1; + else + return 0; +} +#endif + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h new file mode 100644 index 00000000000..5b0d8785d0e --- /dev/null +++ b/intern/cycles/kernel/bvh/bvh_nodes.h @@ -0,0 +1,656 @@ +/* + * Copyright 2011-2016, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and +// 3-vector which might be faster. +ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg, + int nodeAddr, + int child) +{ + Transform space; + const int child_addr = nodeAddr + child * 3; + space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1); + space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2); + space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3); + space.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f); + return space; +} + +#if !defined(__KERNEL_SSE2__) +ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg, + const float3 P, + const float3 idir, + const float t, + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + + /* fetch node data */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1); + float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2); + float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3); + + /* intersect ray against child nodes */ + float c0lox = (node0.x - P.x) * idir.x; + float c0hix = (node0.z - P.x) * idir.x; + float c0loy = (node1.x - P.y) * idir.y; + float c0hiy = (node1.z - P.y) * idir.y; + float c0loz = (node2.x - P.z) * idir.z; + float c0hiz = (node2.z - P.z) * idir.z; + float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); + float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); + + float c1lox = (node0.y - P.x) * idir.x; + float c1hix = (node0.w - P.x) * idir.x; + float c1loy = (node1.y - P.y) * idir.y; + float c1hiy = (node1.w - P.y) * idir.y; + float c1loz = (node2.y - P.z) * idir.z; + float c1hiz = (node2.w - P.z) * idir.z; + float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); + float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); + + dist[0] = c0min; + dist[1] = c1min; + +#ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | + (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); +#else + return ((c0max >= c0min)? 1: 0) | + ((c1max >= c1min)? 2: 0); +#endif +} + +ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg, + const float3 P, + const float3 idir, + const float t, + const float difl, + const float extmax, + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + + /* fetch node data */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1); + float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2); + float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3); + + /* intersect ray against child nodes */ + float c0lox = (node0.x - P.x) * idir.x; + float c0hix = (node0.z - P.x) * idir.x; + float c0loy = (node1.x - P.y) * idir.y; + float c0hiy = (node1.z - P.y) * idir.y; + float c0loz = (node2.x - P.z) * idir.z; + float c0hiz = (node2.z - P.z) * idir.z; + float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); + float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); + + float c1lox = (node0.y - P.x) * idir.x; + float c1hix = (node0.w - P.x) * idir.x; + float c1loy = (node1.y - P.y) * idir.y; + float c1hiy = (node1.w - P.y) * idir.y; + float c1loz = (node2.y - P.z) * idir.z; + float c1hiz = (node2.w - P.z) * idir.z; + float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); + float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); + + if(difl != 0.0f) { + float hdiff = 1.0f + difl; + float ldiff = 1.0f - difl; + if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) { + c0min = max(ldiff * c0min, c0min - extmax); + c0max = min(hdiff * c0max, c0max + extmax); + } + if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) { + c1min = max(ldiff * c1min, c1min - extmax); + c1max = min(hdiff * c1max, c1max + extmax); + } + } + + dist[0] = c0min; + dist[1] = c1min; + +#ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | + (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); +#else + return ((c0max >= c0min)? 1: 0) | + ((c1max >= c1min)? 2: 0); +#endif +} + +ccl_device_inline bool bvh_unaligned_node_intersect_child( + KernelGlobals *kg, + const float3 P, + const float3 dir, + const float t, + int nodeAddr, + int child, + float dist[2]) +{ + Transform space = bvh_unaligned_node_fetch_space(kg, nodeAddr, child); + float3 aligned_dir = transform_direction(&space, dir); + float3 aligned_P = transform_point(&space, P); + float3 nrdir = -bvh_inverse_direction(aligned_dir); + float3 tLowerXYZ = aligned_P * nrdir; + float3 tUpperXYZ = tLowerXYZ - nrdir; + const float tNearX = min(tLowerXYZ.x, tUpperXYZ.x); + const float tNearY = min(tLowerXYZ.y, tUpperXYZ.y); + const float tNearZ = min(tLowerXYZ.z, tUpperXYZ.z); + const float tFarX = max(tLowerXYZ.x, tUpperXYZ.x); + const float tFarY = max(tLowerXYZ.y, tUpperXYZ.y); + const float tFarZ = max(tLowerXYZ.z, tUpperXYZ.z); + const float tNear = max4(0.0f, tNearX, tNearY, tNearZ); + const float tFar = min4(t, tFarX, tFarY, tFarZ); + *dist = tNear; + return tNear <= tFar; +} + +ccl_device_inline bool bvh_unaligned_node_intersect_child_robust( + KernelGlobals *kg, + const float3 P, + const float3 dir, + const float t, + const float difl, + int nodeAddr, + int child, + float dist[2]) +{ + Transform space = bvh_unaligned_node_fetch_space(kg, nodeAddr, child); + float3 aligned_dir = transform_direction(&space, dir); + float3 aligned_P = transform_point(&space, P); + float3 nrdir = -bvh_inverse_direction(aligned_dir); + float3 tLowerXYZ = aligned_P * nrdir; + float3 tUpperXYZ = tLowerXYZ - nrdir; + const float tNearX = min(tLowerXYZ.x, tUpperXYZ.x); + const float tNearY = min(tLowerXYZ.y, tUpperXYZ.y); + const float tNearZ = min(tLowerXYZ.z, tUpperXYZ.z); + const float tFarX = max(tLowerXYZ.x, tUpperXYZ.x); + const float tFarY = max(tLowerXYZ.y, tUpperXYZ.y); + const float tFarZ = max(tLowerXYZ.z, tUpperXYZ.z); + const float tNear = max4(0.0f, tNearX, tNearY, tNearZ); + const float tFar = min4(t, tFarX, tFarY, tFarZ); + *dist = tNear; + if(difl != 0.0f) { + /* TODO(sergey): Same as for QBVH, needs a proper use. */ + const float round_down = 1.0f - difl; + const float round_up = 1.0f + difl; + return round_down*tNear <= round_up*tFar; + } + else { + return tNear <= tFar; + } +} + +ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg, + const float3 P, + const float3 dir, + const float3 idir, + const float t, + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + int mask = 0; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 0, &dist[0])) { +#ifdef __VISIBILITY_FLAG__ + if((__float_as_uint(cnodes.x) & visibility)) +#endif + { + mask |= 1; + } + } + if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 1, &dist[1])) { +#ifdef __VISIBILITY_FLAG__ + if((__float_as_uint(cnodes.y) & visibility)) +#endif + { + mask |= 2; + } + } + return mask; +} + +ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg, + const float3 P, + const float3 dir, + const float3 idir, + const float t, + const float difl, + const float extmax, + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + int mask = 0; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, nodeAddr, 0, &dist[0])) { +#ifdef __VISIBILITY_FLAG__ + if((__float_as_uint(cnodes.x) & visibility)) +#endif + { + mask |= 1; + } + } + if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, nodeAddr, 1, &dist[1])) { +#ifdef __VISIBILITY_FLAG__ + if((__float_as_uint(cnodes.y) & visibility)) +#endif + { + mask |= 2; + } + } + return mask; +} + +ccl_device_inline int bvh_node_intersect(KernelGlobals *kg, + const float3 P, + const float3 dir, + const float3 idir, + const float t, + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr); + if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return bvh_unaligned_node_intersect(kg, + P, + dir, + idir, + t, + nodeAddr, + visibility, + dist); + } + else { + return bvh_aligned_node_intersect(kg, + P, + idir, + t, + nodeAddr, + visibility, + dist); + } +} + +ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg, + const float3 P, + const float3 dir, + const float3 idir, + const float t, + const float difl, + const float extmax, + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr); + if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return bvh_unaligned_node_intersect_robust(kg, + P, + dir, + idir, + t, + difl, + extmax, + nodeAddr, + visibility, + dist); + } + else { + return bvh_aligned_node_intersect_robust(kg, + P, + idir, + t, + difl, + extmax, + nodeAddr, + visibility, + dist); + } +} +#else /* !defined(__KERNEL_SSE2__) */ + +int ccl_device_inline bvh_aligned_node_intersect( + KernelGlobals *kg, + const float3& P, + const float3& dir, + const ssef& tsplat, + const ssef Psplat[3], + const ssef idirsplat[3], + const shuffle_swap_t shufflexyz[3], + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + + /* fetch node data */ + const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr; + + /* intersect ray against child nodes */ + const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; + const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; + const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; + + /* calculate { c0min, c1min, -c0max, -c1max} */ + ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); + const ssef tminmax = minmax ^ pn; + const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); + + dist[0] = tminmax[0]; + dist[1] = tminmax[1]; + + int mask = movemask(lrhit); + +# ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | + (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); + return cmask; +# else + return mask & 3; +# endif +} + +int ccl_device_inline bvh_aligned_node_intersect_robust( + KernelGlobals *kg, + const float3& P, + const float3& dir, + const ssef& tsplat, + const ssef Psplat[3], + const ssef idirsplat[3], + const shuffle_swap_t shufflexyz[3], + const float difl, + const float extmax, + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + + /* fetch node data */ + const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr; + + /* intersect ray against child nodes */ + const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; + const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; + const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; + + /* calculate { c0min, c1min, -c0max, -c1max} */ + ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); + const ssef tminmax = minmax ^ pn; + + if(difl != 0.0f) { + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + float4 *tminmaxview = (float4*)&tminmax; + float& c0min = tminmaxview->x, &c1min = tminmaxview->y; + float& c0max = tminmaxview->z, &c1max = tminmaxview->w; + float hdiff = 1.0f + difl; + float ldiff = 1.0f - difl; + if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) { + c0min = max(ldiff * c0min, c0min - extmax); + c0max = min(hdiff * c0max, c0max + extmax); + } + if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) { + c1min = max(ldiff * c1min, c1min - extmax); + c1max = min(hdiff * c1max, c1max + extmax); + } + } + + const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); + + dist[0] = tminmax[0]; + dist[1] = tminmax[1]; + + int mask = movemask(lrhit); + +# ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | + (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); + return cmask; +# else + return mask & 3; +# endif +} + +int ccl_device_inline bvh_unaligned_node_intersect(KernelGlobals *kg, + const float3 P, + const float3 dir, + const ssef& tnear, + const ssef& tfar, + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + Transform space0 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 0); + Transform space1 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 1); + + float3 aligned_dir0 = transform_direction(&space0, dir), + aligned_dir1 = transform_direction(&space1, dir);; + float3 aligned_P0 = transform_point(&space0, P), + aligned_P1 = transform_point(&space1, P); + float3 nrdir0 = -bvh_inverse_direction(aligned_dir0), + nrdir1 = -bvh_inverse_direction(aligned_dir1); + + ssef tLowerX = ssef(aligned_P0.x * nrdir0.x, + aligned_P1.x * nrdir1.x, + 0.0f, 0.0f), + tLowerY = ssef(aligned_P0.y * nrdir0.y, + aligned_P1.y * nrdir1.y, + 0.0f, + 0.0f), + tLowerZ = ssef(aligned_P0.z * nrdir0.z, + aligned_P1.z * nrdir1.z, + 0.0f, + 0.0f); + + ssef tUpperX = tLowerX - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f), + tUpperY = tLowerY - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f), + tUpperZ = tLowerZ - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f); + + ssef tnear_x = min(tLowerX, tUpperX); + ssef tnear_y = min(tLowerY, tUpperY); + ssef tnear_z = min(tLowerZ, tUpperZ); + ssef tfar_x = max(tLowerX, tUpperX); + ssef tfar_y = max(tLowerY, tUpperY); + ssef tfar_z = max(tLowerZ, tUpperZ); + + const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); + const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); + sseb vmask = tNear <= tFar; + dist[0] = tNear.f[0]; + dist[1] = tNear.f[1]; + + int mask = (int)movemask(vmask); + +# ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | + (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); + return cmask; +# else + return mask & 3; +# endif +} + +int ccl_device_inline bvh_unaligned_node_intersect_robust(KernelGlobals *kg, + const float3 P, + const float3 dir, + const ssef& tnear, + const ssef& tfar, + const float difl, + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + Transform space0 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 0); + Transform space1 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 1); + + float3 aligned_dir0 = transform_direction(&space0, dir), + aligned_dir1 = transform_direction(&space1, dir);; + float3 aligned_P0 = transform_point(&space0, P), + aligned_P1 = transform_point(&space1, P); + float3 nrdir0 = -bvh_inverse_direction(aligned_dir0), + nrdir1 = -bvh_inverse_direction(aligned_dir1); + + ssef tLowerX = ssef(aligned_P0.x * nrdir0.x, + aligned_P1.x * nrdir1.x, + 0.0f, 0.0f), + tLowerY = ssef(aligned_P0.y * nrdir0.y, + aligned_P1.y * nrdir1.y, + 0.0f, + 0.0f), + tLowerZ = ssef(aligned_P0.z * nrdir0.z, + aligned_P1.z * nrdir1.z, + 0.0f, + 0.0f); + + ssef tUpperX = tLowerX - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f), + tUpperY = tLowerY - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f), + tUpperZ = tLowerZ - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f); + + ssef tnear_x = min(tLowerX, tUpperX); + ssef tnear_y = min(tLowerY, tUpperY); + ssef tnear_z = min(tLowerZ, tUpperZ); + ssef tfar_x = max(tLowerX, tUpperX); + ssef tfar_y = max(tLowerY, tUpperY); + ssef tfar_z = max(tLowerZ, tUpperZ); + + const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); + const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); + sseb vmask; + if(difl != 0.0f) { + const float round_down = 1.0f - difl; + const float round_up = 1.0f + difl; + vmask = round_down*tNear <= round_up*tFar; + } + else { + vmask = tNear <= tFar; + } + + dist[0] = tNear.f[0]; + dist[1] = tNear.f[1]; + + int mask = (int)movemask(vmask); + +# ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | + (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); + return cmask; +# else + return mask & 3; +# endif +} + +ccl_device_inline int bvh_node_intersect(KernelGlobals *kg, + const float3& P, + const float3& dir, + const ssef& tnear, + const ssef& tfar, + const ssef& tsplat, + const ssef Psplat[3], + const ssef idirsplat[3], + const shuffle_swap_t shufflexyz[3], + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr); + if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return bvh_unaligned_node_intersect(kg, + P, + dir, + tnear, + tfar, + nodeAddr, + visibility, + dist); + } + else { + return bvh_aligned_node_intersect(kg, + P, + dir, + tsplat, + Psplat, + idirsplat, + shufflexyz, + nodeAddr, + visibility, + dist); + } +} + +ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg, + const float3& P, + const float3& dir, + const ssef& tnear, + const ssef& tfar, + const ssef& tsplat, + const ssef Psplat[3], + const ssef idirsplat[3], + const shuffle_swap_t shufflexyz[3], + const float difl, + const float extmax, + const int nodeAddr, + const uint visibility, + float dist[2]) +{ + float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr); + if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return bvh_unaligned_node_intersect_robust(kg, + P, + dir, + tnear, + tfar, + difl, + nodeAddr, + visibility, + dist); + } + else { + return bvh_aligned_node_intersect_robust(kg, + P, + dir, + tsplat, + Psplat, + idirsplat, + shufflexyz, + difl, + extmax, + nodeAddr, + visibility, + dist); + } +} +#endif /* !defined(__KERNEL_SSE2__) */ diff --git a/intern/cycles/kernel/bvh/bvh_shadow.h b/intern/cycles/kernel/bvh/bvh_shadow.h new file mode 100644 index 00000000000..02147d20fee --- /dev/null +++ b/intern/cycles/kernel/bvh/bvh_shadow.h @@ -0,0 +1,386 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2013, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __QBVH__ +# include "qbvh_shadow.h" +#endif + +#if BVH_FEATURE(BVH_HAIR) +# define NODE_INTERSECT bvh_node_intersect +#else +# define NODE_INTERSECT bvh_aligned_node_intersect +#endif + +/* This is a template BVH traversal function, where various features can be + * enabled/disabled. This way we can compile optimized versions for each case + * without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + uint *num_hits) +{ + /* todo: + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_itfm; +#endif + +#if BVH_FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + *num_hits = 0; + isect_array->t = tmax; + +#if defined(__KERNEL_SSE2__) + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; +# if BVH_FEATURE(BVH_HAIR) + ssef tnear(0.0f), tfar(isect_t); +# endif + shuffle_swap_t shufflexyz[3]; + + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif /* __KERNEL_SSE2__ */ + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + int nodeAddrChild1, traverse_mask; + float dist[2]; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + +#if !defined(__KERNEL_SSE2__) + traverse_mask = NODE_INTERSECT(kg, + P, +# if BVH_FEATURE(BVH_HAIR) + dir, +# endif + idir, + isect_t, + nodeAddr, + PATH_RAY_SHADOW, + dist); +#else // __KERNEL_SSE2__ + traverse_mask = NODE_INTERSECT(kg, + P, + dir, +# if BVH_FEATURE(BVH_HAIR) + tnear, + tfar, +# endif + tsplat, + Psplat, + idirsplat, + shufflexyz, + nodeAddr, + PATH_RAY_SHADOW, + dist); +#endif // __KERNEL_SSE2__ + + nodeAddr = __float_as_int(cnodes.z); + nodeAddrChild1 = __float_as_int(cnodes.w); + + if(traverse_mask == 3) { + /* Both children were intersected, push the farther one. */ + bool closestChild1 = (dist[1] < dist[0]); + + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = nodeAddrChild1; + } + else { + /* One child was intersected. */ + if(traverse_mask == 2) { + nodeAddr = nodeAddrChild1; + } + else if(traverse_mask == 0) { + /* Neither child was intersected. */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + while(primAddr < primAddr2) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + + bool hit; + + /* todo: specialized intersect functions which don't fill in + * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? + * might give a few % performance improvement */ + + switch(p_type) { + case PRIMITIVE_TRIANGLE: { + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr); + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr); + break; + } +#endif +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); + else + hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); + break; + } +#endif + default: { + hit = false; + break; + } + } + + /* shadow ray early termination */ + if(hit) { + /* detect if this surface has a shader with transparent shadows */ + + /* todo: optimize so primitive visibility flag indicates if + * the primitive has a transparent shadow shader? */ + int prim = kernel_tex_fetch(__prim_index, isect_array->prim); + int shader = 0; + +#ifdef __HAIR__ + if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) +#endif + { + shader = kernel_tex_fetch(__tri_shader, prim); + } +#ifdef __HAIR__ + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } +#endif + int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2); + + /* if no transparent shadows, all light is blocked */ + if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { + return true; + } + /* if maximum number of hits reached, block all light */ + else if(*num_hits == max_hits) { + return true; + } + + /* move on to next entry in intersections array */ + isect_array++; + (*num_hits)++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + + isect_array->t = isect_t; + } + + primAddr++; + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); +# else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + num_hits_in_instance = 0; + isect_array->t = isect_t; + +# if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); +# if BVH_FEATURE(BVH_HAIR) + tfar = ssef(isect_t); +# endif + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +# endif + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + if(num_hits_in_instance) { + float t_fac; + +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); +# else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + + /* scale isect->t to adjust for instancing */ + for(int i = 0; i < num_hits_in_instance; i++) + (isect_array-i-1)->t *= t_fac; + } + else { + float ignore_t = FLT_MAX; + +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); +# else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +# endif + triangle_intersect_precalc(dir, &isect_precalc); + } + + isect_t = tmax; + isect_array->t = isect_t; + +# if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); +# if BVH_FEATURE(BVH_HAIR) + tfar = ssef(isect_t); +# endif + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +# endif + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return false; +} + +ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + uint *num_hits) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect_array, + max_hits, + num_hits); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect_array, + max_hits, + num_hits); + } +} + +#undef BVH_FUNCTION_NAME +#undef BVH_FUNCTION_FEATURES +#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/bvh_subsurface.h b/intern/cycles/kernel/bvh/bvh_subsurface.h new file mode 100644 index 00000000000..7121c5791df --- /dev/null +++ b/intern/cycles/kernel/bvh/bvh_subsurface.h @@ -0,0 +1,266 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2013, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __QBVH__ +# include "qbvh_subsurface.h" +#endif + +#if BVH_FEATURE(BVH_HAIR) +# define NODE_INTERSECT bvh_node_intersect +#else +# define NODE_INTERSECT bvh_aligned_node_intersect +#endif + +/* This is a template BVH traversal function for subsurface scattering, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + SubsurfaceIntersection *ss_isect, + int subsurface_object, + uint *lcg_state, + int max_hits) +{ + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object); + + /* ray parameters in registers */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = ray->t; + + ss_isect->num_hits = 0; + + const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object); + if(!(object_flag & SD_TRANSFORM_APPLIED)) { +#if BVH_FEATURE(BVH_MOTION) + Transform ob_itfm; + bvh_instance_motion_push(kg, + subsurface_object, + ray, + &P, + &dir, + &idir, + &isect_t, + &ob_itfm); +#else + bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t); +#endif + object = subsurface_object; + } + +#if defined(__KERNEL_SSE2__) + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; +# if BVH_FEATURE(BVH_HAIR) + ssef tnear(0.0f), tfar(isect_t); +# endif + shuffle_swap_t shufflexyz[3]; + + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + int nodeAddrChild1, traverse_mask; + float dist[2]; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + +#if !defined(__KERNEL_SSE2__) + traverse_mask = NODE_INTERSECT(kg, + P, +# if BVH_FEATURE(BVH_HAIR) + dir, +# endif + idir, + isect_t, + nodeAddr, + PATH_RAY_ALL_VISIBILITY, + dist); +#else // __KERNEL_SSE2__ + traverse_mask = NODE_INTERSECT(kg, + P, + dir, +# if BVH_FEATURE(BVH_HAIR) + tnear, + tfar, +# endif + tsplat, + Psplat, + idirsplat, + shufflexyz, + nodeAddr, + PATH_RAY_ALL_VISIBILITY, + dist); +#endif // __KERNEL_SSE2__ + + nodeAddr = __float_as_int(cnodes.z); + nodeAddrChild1 = __float_as_int(cnodes.w); + + if(traverse_mask == 3) { + /* Both children were intersected, push the farther one. */ + bool closestChild1 = (dist[1] < dist[0]); + + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = nodeAddrChild1; + } + else { + /* One child was intersected. */ + if(traverse_mask == 2) { + nodeAddr = nodeAddrChild1; + } + else if(traverse_mask == 0) { + /* Neither child was intersected. */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); + int primAddr = __float_as_int(leaf.x); + + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + triangle_intersect_subsurface(kg, + &isect_precalc, + ss_isect, + P, + object, + primAddr, + isect_t, + lcg_state, + max_hits); + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + motion_triangle_intersect_subsurface(kg, + ss_isect, + P, + dir, + ray->time, + object, + primAddr, + isect_t, + lcg_state, + max_hits); + } + break; + } +#endif + default: { + break; + } + } + } + } while(nodeAddr != ENTRYPOINT_SENTINEL); + } while(nodeAddr != ENTRYPOINT_SENTINEL); +} + +ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + SubsurfaceIntersection *ss_isect, + int subsurface_object, + uint *lcg_state, + int max_hits) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + ss_isect, + subsurface_object, + lcg_state, + max_hits); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + ss_isect, + subsurface_object, + lcg_state, + max_hits); + } +} + +#undef BVH_FUNCTION_NAME +#undef BVH_FUNCTION_FEATURES +#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h new file mode 100644 index 00000000000..36c3398335c --- /dev/null +++ b/intern/cycles/kernel/bvh/bvh_traversal.h @@ -0,0 +1,428 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2013, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __QBVH__ +# include "qbvh_traversal.h" +#endif + +#if BVH_FEATURE(BVH_HAIR) +# define NODE_INTERSECT bvh_node_intersect +# define NODE_INTERSECT_ROBUST bvh_node_intersect_robust +#else +# define NODE_INTERSECT bvh_aligned_node_intersect +# define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust +#endif + +/* This is a template BVH traversal function, where various features can be + * enabled/disabled. This way we can compile optimized versions for each case + * without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + , uint *lcg_state, + float difl, + float extmax +#endif + ) +{ + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_itfm; +#endif + + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; + + BVH_DEBUG_INIT(); + +#if defined(__KERNEL_SSE2__) + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; +# if BVH_FEATURE(BVH_HAIR) + ssef tnear(0.0f), tfar(isect->t); +# endif + shuffle_swap_t shufflexyz[3]; + + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + int nodeAddrChild1, traverse_mask; + float dist[2]; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + +#if !defined(__KERNEL_SSE2__) +# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + if(difl != 0.0f) { + traverse_mask = NODE_INTERSECT_ROBUST(kg, + P, +# if BVH_FEATURE(BVH_HAIR) + dir, +# endif + idir, + isect->t, + difl, + extmax, + nodeAddr, + visibility, + dist); + } + else +# endif + { + traverse_mask = NODE_INTERSECT(kg, + P, +# if BVH_FEATURE(BVH_HAIR) + dir, +# endif + idir, + isect->t, + nodeAddr, + visibility, + dist); + } +#else // __KERNEL_SSE2__ +# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + if(difl != 0.0f) { + traverse_mask = NODE_INTERSECT_ROBUST(kg, + P, + dir, +# if BVH_FEATURE(BVH_HAIR) + tnear, + tfar, +# endif + tsplat, + Psplat, + idirsplat, + shufflexyz, + difl, + extmax, + nodeAddr, + visibility, + dist); + } + else +# endif + { + traverse_mask = NODE_INTERSECT(kg, + P, + dir, +# if BVH_FEATURE(BVH_HAIR) + tnear, + tfar, +# endif + tsplat, + Psplat, + idirsplat, + shufflexyz, + nodeAddr, + visibility, + dist); + } +#endif // __KERNEL_SSE2__ + + nodeAddr = __float_as_int(cnodes.z); + nodeAddrChild1 = __float_as_int(cnodes.w); + + if(traverse_mask == 3) { + /* Both children were intersected, push the farther one. */ + bool closestChild1 = (dist[1] < dist[0]); + + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = nodeAddrChild1; + } + else { + /* One child was intersected. */ + if(traverse_mask == 2) { + nodeAddr = nodeAddrChild1; + } + else if(traverse_mask == 0) { + /* Neither child was intersected. */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + BVH_DEBUG_NEXT_STEP(); + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + BVH_DEBUG_NEXT_STEP(); + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) { + /* shadow ray early termination */ +#if defined(__KERNEL_SSE2__) + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); +# if BVH_FEATURE(BVH_HAIR) + tfar = ssef(isect->t); +# endif +#else + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; +#endif + } + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + BVH_DEBUG_NEXT_STEP(); + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) { + /* shadow ray early termination */ +# if defined(__KERNEL_SSE2__) + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); +# if BVH_FEATURE(BVH_HAIR) + tfar = ssef(isect->t); +# endif +# else + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; +# endif + } + } + break; + } +#endif /* BVH_FEATURE(BVH_MOTION) */ +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + for(; primAddr < primAddr2; primAddr++) { + BVH_DEBUG_NEXT_STEP(); + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + bool hit; + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); + else + hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); + if(hit) { + /* shadow ray early termination */ +# if defined(__KERNEL_SSE2__) + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); +# if BVH_FEATURE(BVH_HAIR) + tfar = ssef(isect->t); +# endif +# else + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; +# endif + } + } + break; + } +#endif /* BVH_FEATURE(BVH_HAIR) */ + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); +# else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); +# endif + triangle_intersect_precalc(dir, &isect_precalc); + +# if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); +# if BVH_FEATURE(BVH_HAIR) + tfar = ssef(isect->t); +# endif + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +# endif + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + + BVH_DEBUG_NEXT_INSTANCE(); + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* instance pop */ +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); +# else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); +# endif + triangle_intersect_precalc(dir, &isect_precalc); + +# if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); +# if BVH_FEATURE(BVH_HAIR) + tfar = ssef(isect->t); +# endif + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +# endif + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return (isect->prim != PRIM_NONE); +} + +ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + , uint *lcg_state, + float difl, + float extmax +#endif + ) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect, + visibility +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + , lcg_state, + difl, + extmax +#endif + ); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect, + visibility +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + , lcg_state, + difl, + extmax +#endif + ); + } +} + +#undef BVH_FUNCTION_NAME +#undef BVH_FUNCTION_FEATURES +#undef NODE_INTERSECT +#undef NODE_INTERSECT_ROBUST diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h new file mode 100644 index 00000000000..8b44c66f7aa --- /dev/null +++ b/intern/cycles/kernel/bvh/bvh_volume.h @@ -0,0 +1,324 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __QBVH__ +# include "qbvh_volume.h" +#endif + +#if BVH_FEATURE(BVH_HAIR) +# define NODE_INTERSECT bvh_node_intersect +#else +# define NODE_INTERSECT bvh_aligned_node_intersect +#endif + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility) +{ + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_itfm; +#endif + + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; + +#if defined(__KERNEL_SSE2__) + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; +# if BVH_FEATURE(BVH_HAIR) + ssef tnear(0.0f), tfar(isect->t); +# endif + shuffle_swap_t shufflexyz[3]; + + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + int nodeAddrChild1, traverse_mask; + float dist[2]; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + +#if !defined(__KERNEL_SSE2__) + traverse_mask = NODE_INTERSECT(kg, + P, +# if BVH_FEATURE(BVH_HAIR) + dir, +# endif + idir, + isect->t, + nodeAddr, + visibility, + dist); +#else // __KERNEL_SSE2__ + traverse_mask = NODE_INTERSECT(kg, + P, + dir, +# if BVH_FEATURE(BVH_HAIR) + tnear, + tfar, +# endif + tsplat, + Psplat, + idirsplat, + shufflexyz, + nodeAddr, + visibility, + dist); +#endif // __KERNEL_SSE2__ + + nodeAddr = __float_as_int(cnodes.z); + nodeAddrChild1 = __float_as_int(cnodes.w); + + if(traverse_mask == 3) { + /* Both children were intersected, push the farther one. */ + bool closestChild1 = (dist[1] < dist[0]); + + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = nodeAddrChild1; + } + else { + /* One child was intersected. */ + if(traverse_mask == 2) { + nodeAddr = nodeAddrChild1; + } + else if(traverse_mask == 0) { + /* Neither child was intersected. */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr); + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr); + } + break; + } +#endif + default: { + break; + } + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); +# else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + +# if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); +# if BVH_FEATURE(BVH_HAIR) + tfar = ssef(isect->t); +# endif + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +# endif + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* pop */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* instance pop */ +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); +# else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + +# if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); +# if BVH_FEATURE(BVH_HAIR) + tfar = ssef(isect->t); +# endif + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +# endif + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } +#endif /* FEATURE(BVH_MOTION) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return (isect->prim != PRIM_NONE); +} + +ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect, + visibility); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect, + visibility); + } +} + +#undef BVH_FUNCTION_NAME +#undef BVH_FUNCTION_FEATURES +#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h new file mode 100644 index 00000000000..445243c4e5c --- /dev/null +++ b/intern/cycles/kernel/bvh/bvh_volume_all.h @@ -0,0 +1,397 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __QBVH__ +# include "qbvh_volume_all.h" +#endif + +#if BVH_FEATURE(BVH_HAIR) +# define NODE_INTERSECT bvh_node_intersect +#else +# define NODE_INTERSECT bvh_aligned_node_intersect +#endif + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + const uint visibility) +{ + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_itfm; +#endif + +#if BVH_FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + uint num_hits = 0; + isect_array->t = tmax; + +#if defined(__KERNEL_SSE2__) + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; +# if BVH_FEATURE(BVH_HAIR) + ssef tnear(0.0f), tfar(isect_t); +# endif + shuffle_swap_t shufflexyz[3]; + + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + int nodeAddrChild1, traverse_mask; + float dist[2]; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + +#if !defined(__KERNEL_SSE2__) + traverse_mask = NODE_INTERSECT(kg, + P, +# if BVH_FEATURE(BVH_HAIR) + dir, +# endif + idir, + isect_t, + nodeAddr, + visibility, + dist); +#else // __KERNEL_SSE2__ + traverse_mask = NODE_INTERSECT(kg, + P, + dir, +# if BVH_FEATURE(BVH_HAIR) + tnear, + tfar, +# endif + tsplat, + Psplat, + idirsplat, + shufflexyz, + nodeAddr, + visibility, + dist); +#endif // __KERNEL_SSE2__ + + nodeAddr = __float_as_int(cnodes.z); + nodeAddrChild1 = __float_as_int(cnodes.w); + + if(traverse_mask == 3) { + /* Both children were intersected, push the farther one. */ + bool closestChild1 = (dist[1] < dist[0]); + + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = nodeAddrChild1; + } + else { + /* One child was intersected. */ + if(traverse_mask == 2) { + nodeAddr = nodeAddrChild1; + } + else if(traverse_mask == 0) { + /* Neither child was intersected. */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + bool hit; + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); +# else + Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + float t_fac = 1.0f / len(transform_direction(&itfm, dir)); +# endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +# if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +# endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +# if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); +# else + Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + float t_fac = 1.0f / len(transform_direction(&itfm, dir)); +# endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +# endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif /* BVH_MOTION */ + default: { + break; + } + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); +# else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + num_hits_in_instance = 0; + isect_array->t = isect_t; + +# if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); +# if BVH_FEATURE(BVH_HAIR) + tfar = ssef(isect_t); +# endif + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +# endif + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* pop */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + if(num_hits_in_instance) { + float t_fac; +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); +# else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +# endif + triangle_intersect_precalc(dir, &isect_precalc); + /* Scale isect->t to adjust for instancing. */ + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } + } + else { + float ignore_t = FLT_MAX; +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); +# else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +# endif + triangle_intersect_precalc(dir, &isect_precalc); + } + + isect_t = tmax; + isect_array->t = isect_t; + +# if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); +# if BVH_FEATURE(BVH_HAIR) + tfar = ssef(isect_t); +# endif + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +# endif + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } +#endif /* FEATURE(BVH_MOTION) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} + +ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + const uint visibility) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect_array, + max_hits, + visibility); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect_array, + max_hits, + visibility); + } +} + +#undef BVH_FUNCTION_NAME +#undef BVH_FUNCTION_FEATURES +#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h new file mode 100644 index 00000000000..5eda3213acb --- /dev/null +++ b/intern/cycles/kernel/bvh/qbvh_nodes.h @@ -0,0 +1,433 @@ +/* + * Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +struct QBVHStackItem { + int addr; + float dist; +}; + +/* TOOD(sergey): Investigate if using intrinsics helps for both + * stack item swap and float comparison. + */ +ccl_device_inline void qbvh_item_swap(QBVHStackItem *__restrict a, + QBVHStackItem *__restrict b) +{ + QBVHStackItem tmp = *a; + *a = *b; + *b = tmp; +} + +ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1, + QBVHStackItem *__restrict s2, + QBVHStackItem *__restrict s3) +{ + if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } + if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); } + if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } +} + +ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1, + QBVHStackItem *__restrict s2, + QBVHStackItem *__restrict s3, + QBVHStackItem *__restrict s4) +{ + if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } + if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); } + if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); } + if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); } + if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); } +} + +/* Axis-aligned nodes intersection */ + +ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg, + const ssef& tnear, + const ssef& tfar, +#ifdef __KERNEL_AVX2__ + const sse3f& org_idir, +#else + const sse3f& org, +#endif + const sse3f& idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int nodeAddr, + ssef *__restrict dist) +{ + const int offset = nodeAddr + 1; +#ifdef __KERNEL_AVX2__ + const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x); + const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y); + const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z); + const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x); + const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y); + const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z); +#else + const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x; + const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y; + const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z; + const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x; + const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y; + const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z; +#endif + +#ifdef __KERNEL_SSE41__ + const ssef tNear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, tnear)); + const ssef tFar = mini(mini(tfar_x, tfar_y), mini(tfar_z, tfar)); + const sseb vmask = cast(tNear) > cast(tFar); + int mask = (int)movemask(vmask)^0xf; +#else + const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); + const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); + const sseb vmask = tNear <= tFar; + int mask = (int)movemask(vmask); +#endif + *dist = tNear; + return mask; +} + +ccl_device_inline int qbvh_aligned_node_intersect_robust( + KernelGlobals *__restrict kg, + const ssef& tnear, + const ssef& tfar, +#ifdef __KERNEL_AVX2__ + const sse3f& P_idir, +#else + const sse3f& P, +#endif + const sse3f& idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int nodeAddr, + const float difl, + ssef *__restrict dist) +{ + const int offset = nodeAddr + 1; +#ifdef __KERNEL_AVX2__ + const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x); + const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y); + const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z); + const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x); + const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y); + const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z); +#else + const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x; + const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y; + const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z; + const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x; + const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y; + const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z; +#endif + + const float round_down = 1.0f - difl; + const float round_up = 1.0f + difl; + const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); + const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); + const sseb vmask = round_down*tNear <= round_up*tFar; + *dist = tNear; + return (int)movemask(vmask); +} + +/* Unaligned nodes intersection */ + +ccl_device_inline int qbvh_unaligned_node_intersect( + KernelGlobals *__restrict kg, + const ssef& tnear, + const ssef& tfar, +#ifdef __KERNEL_AVX2__ + const sse3f& org_idir, +#endif + const sse3f& org, + const sse3f& dir, + const sse3f& idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int nodeAddr, + ssef *__restrict dist) +{ + const int offset = nodeAddr; + const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1); + const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2); + const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3); + + const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4); + const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5); + const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6); + + const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7); + const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8); + const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9); + + const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10); + const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11); + const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12); + + const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z, + aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z, + aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z; + + const ssef aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x, + aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y, + aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z; + + const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f); + const ssef nrdir_x = neg_one / aligned_dir_x, + nrdir_y = neg_one / aligned_dir_y, + nrdir_z = neg_one / aligned_dir_z; + + const ssef tlower_x = aligned_P_x * nrdir_x, + tlower_y = aligned_P_y * nrdir_y, + tlower_z = aligned_P_z * nrdir_z; + + const ssef tupper_x = tlower_x - nrdir_x, + tupper_y = tlower_y - nrdir_y, + tupper_z = tlower_z - nrdir_z; + +#ifdef __KERNEL_SSE41__ + const ssef tnear_x = mini(tlower_x, tupper_x); + const ssef tnear_y = mini(tlower_y, tupper_y); + const ssef tnear_z = mini(tlower_z, tupper_z); + const ssef tfar_x = maxi(tlower_x, tupper_x); + const ssef tfar_y = maxi(tlower_y, tupper_y); + const ssef tfar_z = maxi(tlower_z, tupper_z); + const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z); + const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z); + const sseb vmask = tNear <= tFar; + *dist = tNear; + return movemask(vmask); +#else + const ssef tnear_x = min(tlower_x, tupper_x); + const ssef tnear_y = min(tlower_y, tupper_y); + const ssef tnear_z = min(tlower_z, tupper_z); + const ssef tfar_x = max(tlower_x, tupper_x); + const ssef tfar_y = max(tlower_y, tupper_y); + const ssef tfar_z = max(tlower_z, tupper_z); + const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z); + const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z); + const sseb vmask = tNear <= tFar; + *dist = tNear; + return movemask(vmask); +#endif +} + +ccl_device_inline int qbvh_unaligned_node_intersect_robust( + KernelGlobals *__restrict kg, + const ssef& tnear, + const ssef& tfar, +#ifdef __KERNEL_AVX2__ + const sse3f& P_idir, +#endif + const sse3f& P, + const sse3f& dir, + const sse3f& idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int nodeAddr, + const float difl, + ssef *__restrict dist) +{ + const int offset = nodeAddr; + const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1); + const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2); + const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3); + + const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4); + const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5); + const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6); + + const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7); + const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8); + const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9); + + const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10); + const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11); + const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12); + + const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z, + aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z, + aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z; + + const ssef aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x, + aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y, + aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z; + + const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f); + const ssef nrdir_x = neg_one / aligned_dir_x, + nrdir_y = neg_one / aligned_dir_y, + nrdir_z = neg_one / aligned_dir_z; + + const ssef tlower_x = aligned_P_x * nrdir_x, + tlower_y = aligned_P_y * nrdir_y, + tlower_z = aligned_P_z * nrdir_z; + + const ssef tupper_x = tlower_x - nrdir_x, + tupper_y = tlower_y - nrdir_y, + tupper_z = tlower_z - nrdir_z; + + const float round_down = 1.0f - difl; + const float round_up = 1.0f + difl; + +#ifdef __KERNEL_SSE41__ + const ssef tnear_x = mini(tlower_x, tupper_x); + const ssef tnear_y = mini(tlower_y, tupper_y); + const ssef tnear_z = mini(tlower_z, tupper_z); + const ssef tfar_x = maxi(tlower_x, tupper_x); + const ssef tfar_y = maxi(tlower_y, tupper_y); + const ssef tfar_z = maxi(tlower_z, tupper_z); +#else + const ssef tnear_x = min(tlower_x, tupper_x); + const ssef tnear_y = min(tlower_y, tupper_y); + const ssef tnear_z = min(tlower_z, tupper_z); + const ssef tfar_x = max(tlower_x, tupper_x); + const ssef tfar_y = max(tlower_y, tupper_y); + const ssef tfar_z = max(tlower_z, tupper_z); +#endif + const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z); + const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z); + const sseb vmask = round_down*tNear <= round_up*tFar; + *dist = tNear; + return movemask(vmask); +} + +/* Intersectors wrappers. + * + * They'll check node type and call appropriate intersection code. + */ + +ccl_device_inline int qbvh_node_intersect( + KernelGlobals *__restrict kg, + const ssef& tnear, + const ssef& tfar, +#ifdef __KERNEL_AVX2__ + const sse3f& org_idir, +#endif + const sse3f& org, + const sse3f& dir, + const sse3f& idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int nodeAddr, + ssef *__restrict dist) +{ + const int offset = nodeAddr; + const float4 node = kernel_tex_fetch(__bvh_nodes, offset); + if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return qbvh_unaligned_node_intersect(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + org_idir, +#endif + org, + dir, + idir, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + dist); + } + else { + return qbvh_aligned_node_intersect(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + org_idir, +#else + org, +#endif + idir, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + dist); + } +} + +ccl_device_inline int qbvh_node_intersect_robust( + KernelGlobals *__restrict kg, + const ssef& tnear, + const ssef& tfar, +#ifdef __KERNEL_AVX2__ + const sse3f& P_idir, +#endif + const sse3f& P, + const sse3f& dir, + const sse3f& idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int nodeAddr, + const float difl, + ssef *__restrict dist) +{ + const int offset = nodeAddr; + const float4 node = kernel_tex_fetch(__bvh_nodes, offset); + if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return qbvh_unaligned_node_intersect_robust(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir, +#endif + P, + dir, + idir, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + difl, + dist); + } + else { + return qbvh_aligned_node_intersect_robust(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir, +#else + P, +#endif + idir, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + difl, + dist); + } +} diff --git a/intern/cycles/kernel/bvh/qbvh_shadow.h b/intern/cycles/kernel/bvh/qbvh_shadow.h new file mode 100644 index 00000000000..e5e611a0d47 --- /dev/null +++ b/intern/cycles/kernel/bvh/qbvh_shadow.h @@ -0,0 +1,449 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function, where various features can be + * enabled/disabled. This way we can compile optimized versions for each case + * without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +#if BVH_FEATURE(BVH_HAIR) +# define NODE_INTERSECT qbvh_node_intersect +#else +# define NODE_INTERSECT qbvh_aligned_node_intersect +#endif + +ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + uint *num_hits) +{ + /* TODO(sergey): + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_itfm; +#endif + + *num_hits = 0; + isect_array->t = tmax; + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return false; + } +#endif + +#if BVH_FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + ssef tnear(0.0f), tfar(tmax); +#if BVH_FEATURE(BVH_HAIR) + sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +#endif + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); +#endif +#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + +#ifdef __VISIBILITY_FLAG__ + if((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0) { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } +#endif + + ssef dist; + int traverseChild = NODE_INTERSECT(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#endif +# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4, +# endif +# if BVH_FEATURE(BVH_HAIR) + dir4, +# endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + + if(traverseChild != 0) { + float4 cnodes; +#if BVH_FEATURE(BVH_HAIR) + if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13); + } + else +#endif + { + cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float*)&dist)[r]; + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); +#ifdef __VISIBILITY_FLAG__ + if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } +#endif + + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + + /* Primitive intersection. */ + while(primAddr < primAddr2) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + + bool hit; + + /* todo: specialized intersect functions which don't fill in + * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? + * might give a few % performance improvement */ + + switch(p_type) { + case PRIMITIVE_TRIANGLE: { + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr); + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr); + break; + } +#endif +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); + else + hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); + break; + } +#endif + default: { + hit = false; + break; + } + } + + /* Shadow ray early termination. */ + if(hit) { + /* detect if this surface has a shader with transparent shadows */ + + /* todo: optimize so primitive visibility flag indicates if + * the primitive has a transparent shadow shader? */ + int prim = kernel_tex_fetch(__prim_index, isect_array->prim); + int shader = 0; + +#ifdef __HAIR__ + if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) +#endif + { + shader = kernel_tex_fetch(__tri_shader, prim); + } +#ifdef __HAIR__ + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } +#endif + int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2); + + /* if no transparent shadows, all light is blocked */ + if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { + return true; + } + /* if maximum number of hits reached, block all light */ + else if(*num_hits == max_hits) { + return true; + } + + /* move on to next entry in intersections array */ + isect_array++; + (*num_hits)++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + + isect_array->t = isect_t; + } + + primAddr++; + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); +# else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +# endif + + num_hits_in_instance = 0; + isect_array->t = isect_t; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); +# if BVH_FEATURE(BVH_HAIR) + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +# endif + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +# ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +# endif +# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + if(num_hits_in_instance) { + float t_fac; + +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); +# else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +# endif + + /* scale isect->t to adjust for instancing */ + for(int i = 0; i < num_hits_in_instance; i++) + (isect_array-i-1)->t *= t_fac; + } + else { + float ignore_t = FLT_MAX; + +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); +# else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +# endif + } + + isect_t = tmax; + isect_array->t = isect_t; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(tmax); +# if BVH_FEATURE(BVH_HAIR) + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +# endif + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +# ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +# endif +# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return false; +} + +#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/qbvh_subsurface.h b/intern/cycles/kernel/bvh/qbvh_subsurface.h new file mode 100644 index 00000000000..4adaf9c8f3d --- /dev/null +++ b/intern/cycles/kernel/bvh/qbvh_subsurface.h @@ -0,0 +1,299 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function for subsurface scattering, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_MOTION: motion blur rendering + * + */ + +#if BVH_FEATURE(BVH_HAIR) +# define NODE_INTERSECT qbvh_node_intersect +#else +# define NODE_INTERSECT qbvh_aligned_node_intersect +#endif + +ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + SubsurfaceIntersection *ss_isect, + int subsurface_object, + uint *lcg_state, + int max_hits) +{ + /* TODO(sergey): + * - Test if pushing distance on the stack helps (for non shadow rays). + * - Separate version for shadow rays. + * - Likely and unlikely for if() statements. + * - SSE for hair. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object); + + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = ray->t; + + ss_isect->num_hits = 0; + + const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object); + if(!(object_flag & SD_TRANSFORM_APPLIED)) { +#if BVH_FEATURE(BVH_MOTION) + Transform ob_itfm; + bvh_instance_motion_push(kg, + subsurface_object, + ray, + &P, + &dir, + &idir, + &isect_t, + &ob_itfm); +#else + bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t); +#endif + object = subsurface_object; + } + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return; + } +#endif + + ssef tnear(0.0f), tfar(isect_t); +#if BVH_FEATURE(BVH_HAIR) + sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +#endif + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); +#endif +#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + ssef dist; + + int traverseChild = NODE_INTERSECT(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#endif +#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4, +#endif +#if BVH_FEATURE(BVH_HAIR) + dir4, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + + if(traverseChild != 0) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + float4 cnodes; +#if BVH_FEATURE(BVH_HAIR) + if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13); + } + else +#endif + { + cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float*)&dist)[r]; + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); + int primAddr = __float_as_int(leaf.x); + + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + + /* Primitive intersection. */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* Intersect ray against primitive, */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + triangle_intersect_subsurface(kg, + &isect_precalc, + ss_isect, + P, + object, + primAddr, + isect_t, + lcg_state, + max_hits); + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + /* Intersect ray against primitive. */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + motion_triangle_intersect_subsurface(kg, + ss_isect, + P, + dir, + ray->time, + object, + primAddr, + isect_t, + lcg_state, + max_hits); + } + break; + } +#endif + default: + break; + } + } + } while(nodeAddr != ENTRYPOINT_SENTINEL); + } while(nodeAddr != ENTRYPOINT_SENTINEL); +} + +#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h new file mode 100644 index 00000000000..24bf85f46c8 --- /dev/null +++ b/intern/cycles/kernel/bvh/qbvh_traversal.h @@ -0,0 +1,465 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function, where various features can be + * enabled/disabled. This way we can compile optimized versions for each case + * without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width + * BVH_MOTION: motion blur rendering + * + */ + +#if BVH_FEATURE(BVH_HAIR) +# define NODE_INTERSECT qbvh_node_intersect +# define NODE_INTERSECT_ROBUST qbvh_node_intersect_robust +#else +# define NODE_INTERSECT qbvh_aligned_node_intersect +# define NODE_INTERSECT_ROBUST qbvh_aligned_node_intersect_robust +#endif + +ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + ,uint *lcg_state, + float difl, + float extmax +#endif + ) +{ + /* TODO(sergey): + * - Test if pushing distance on the stack helps (for non shadow rays). + * - Separate version for shadow rays. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + traversalStack[0].dist = -FLT_MAX; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + float nodeDist = -FLT_MAX; + + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_itfm; +#endif + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return false; + } +#endif + + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; + + BVH_DEBUG_INIT(); + + ssef tnear(0.0f), tfar(ray->t); +#if BVH_FEATURE(BVH_HAIR) + sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +#endif + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#endif +#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + + if(UNLIKELY(nodeDist > isect->t) +#ifdef __VISIBILITY_FLAG__ + || (__float_as_uint(inodes.x) & visibility) == 0) +#endif + { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + continue; + } + + int traverseChild; + ssef dist; + + BVH_DEBUG_NEXT_STEP(); + +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + if(difl != 0.0f) { + /* NOTE: We extend all the child BB instead of fetching + * and checking visibility flags for each of the, + * + * Need to test if doing opposite would be any faster. + */ + traverseChild = NODE_INTERSECT_ROBUST(kg, + tnear, + tfar, +# ifdef __KERNEL_AVX2__ + P_idir4, +# endif +# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4, +# endif +# if BVH_FEATURE(BVH_HAIR) + dir4, +# endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + difl, + &dist); + } + else +#endif /* BVH_HAIR_MINIMUM_WIDTH */ + { + traverseChild = NODE_INTERSECT(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#endif +#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4, +#endif +#if BVH_FEATURE(BVH_HAIR) + dir4, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + } + + if(traverseChild != 0) { + float4 cnodes; + /* TODO(sergey): Investigate whether moving cnodes upwards + * gives a speedup (will be different cache pattern but will + * avoid extra check here), + */ +#if BVH_FEATURE(BVH_HAIR) + if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13); + } + else +#endif + { + cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + float d0 = ((float*)&dist)[r]; + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + nodeDist = d0; + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + nodeDist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + nodeDist = d0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); + +#ifdef __VISIBILITY_FLAG__ + if(UNLIKELY((nodeDist > isect->t) || + ((__float_as_uint(leaf.z) & visibility) == 0))) +#else + if(UNLIKELY((nodeDist > isect->t))) +#endif + { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + continue; + } + + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + + /* Primitive intersection. */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + BVH_DEBUG_NEXT_STEP(); + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) { + tfar = ssef(isect->t); + /* Shadow ray early termination. */ + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + } + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + BVH_DEBUG_NEXT_STEP(); + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) { + tfar = ssef(isect->t); + /* Shadow ray early termination. */ + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + } + } + break; + } +#endif /* BVH_FEATURE(BVH_MOTION) */ +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + for(; primAddr < primAddr2; primAddr++) { + BVH_DEBUG_NEXT_STEP(); + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + bool hit; + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); + else + hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); + if(hit) { + tfar = ssef(isect->t); + /* Shadow ray early termination. */ + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + } + } + break; + } +#endif /* BVH_FEATURE(BVH_HAIR) */ + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + +# if BVH_FEATURE(BVH_MOTION) + qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_itfm); +# else + qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist); +# endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect->t); +# if BVH_FEATURE(BVH_HAIR) + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +# endif + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +# ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +# endif +# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + traversalStack[stackPtr].dist = -FLT_MAX; + + nodeAddr = kernel_tex_fetch(__object_node, object); + + BVH_DEBUG_NEXT_INSTANCE(); + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* Instance pop. */ +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); +# else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); +# endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect->t); +# if BVH_FEATURE(BVH_HAIR) + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +# endif + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +# ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +# endif +# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return (isect->prim != PRIM_NONE); +} + +#undef NODE_INTERSECT +#undef NODE_INTERSECT_ROBUST diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h new file mode 100644 index 00000000000..da21ede9e12 --- /dev/null +++ b/intern/cycles/kernel/bvh/qbvh_volume.h @@ -0,0 +1,374 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_MOTION: motion blur rendering + * + */ + +#if BVH_FEATURE(BVH_HAIR) +# define NODE_INTERSECT qbvh_node_intersect +#else +# define NODE_INTERSECT qbvh_aligned_node_intersect +#endif + +ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility) +{ + /* TODO(sergey): + * - Test if pushing distance on the stack helps. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_itfm; +#endif + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return false; + } +#endif + + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; + + ssef tnear(0.0f), tfar(ray->t); +#if BVH_FEATURE(BVH_HAIR) + sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +#endif + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); +#endif +#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { +#ifdef __VISIBILITY_FLAG__ + float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + if((__float_as_uint(inodes.x) & visibility) == 0) { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } +#endif + + ssef dist; + int traverseChild = NODE_INTERSECT(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#endif +#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4, +#endif +#if BVH_FEATURE(BVH_HAIR) + dir4, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + + if(traverseChild != 0) { + float4 cnodes; +#if BVH_FEATURE(BVH_HAIR) + if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13); + } + else +#endif + { + cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float*)&dist)[r]; + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + + /* Primitive intersection. */ + switch(p_type) { + case PRIMITIVE_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr); + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr); + } + break; + } +#endif + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); +# else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); +# endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect->t); +# if BVH_FEATURE(BVH_HAIR) + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +# endif + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +# ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +# endif +# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* Pop. */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* Instance pop. */ +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); +# else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); +# endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect->t); +# if BVH_FEATURE(BVH_HAIR) + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +# endif + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +# ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +# endif +# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return (isect->prim != PRIM_NONE); +} + +#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h new file mode 100644 index 00000000000..8a31775fae3 --- /dev/null +++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h @@ -0,0 +1,446 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_MOTION: motion blur rendering + * + */ + +#if BVH_FEATURE(BVH_HAIR) +# define NODE_INTERSECT qbvh_node_intersect +#else +# define NODE_INTERSECT qbvh_aligned_node_intersect +#endif + +ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + const uint visibility) +{ + /* TODO(sergey): + * - Test if pushing distance on the stack helps. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_itfm; +#endif + + uint num_hits = 0; + isect_array->t = tmax; + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return false; + } +#endif + +#if BVH_FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + ssef tnear(0.0f), tfar(isect_t); +#if BVH_FEATURE(BVH_HAIR) + sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +#endif + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); +#endif +#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { +#ifdef __VISIBILITY_FLAG__ + float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); + if((__float_as_uint(inodes.x) & visibility) == 0) { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } +#endif + + ssef dist; + int traverseChild = NODE_INTERSECT(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#endif +#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4, +#endif +#if BVH_FEATURE(BVH_HAIR) + dir4, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + + if(traverseChild != 0) { + float4 cnodes; +#if BVH_FEATURE(BVH_HAIR) + if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13); + } + else +#endif + { + cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float*)&dist)[r]; + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + bool hit; + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + + /* Primitive intersection. */ + switch(p_type) { + case PRIMITIVE_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); +# else + Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + float t_fac = 1.0f / len(transform_direction(&itfm, dir)); +# endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +# if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +# endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +# if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); +# else + Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + float t_fac = 1.0f / len(transform_direction(&itfm, dir)); +# endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +# endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); +# else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +# endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +# if BVH_FEATURE(BVH_HAIR) + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +# endif +# ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +# endif +# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + num_hits_in_instance = 0; + isect_array->t = isect_t; + + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* Pop. */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* Instance pop. */ + if(num_hits_in_instance) { + float t_fac; +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); +# else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +# endif + triangle_intersect_precalc(dir, &isect_precalc); + /* Scale isect->t to adjust for instancing. */ + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } + } + else { + float ignore_t = FLT_MAX; +# if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); +# else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +# endif + triangle_intersect_precalc(dir, &isect_precalc); + } + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); +# if BVH_FEATURE(BVH_HAIR) + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); +# endif + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +# ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +# endif +# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +# endif + + triangle_intersect_precalc(dir, &isect_precalc); + isect_t = tmax; + isect_array->t = isect_t; + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} + +#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h index 33e91d1ee44..d2c7edb11ea 100644 --- a/intern/cycles/kernel/geom/geom.h +++ b/intern/cycles/kernel/geom/geom.h @@ -15,14 +15,6 @@ * limitations under the License. */ -/* bottom-most stack entry, indicating the end of traversal */ -#define ENTRYPOINT_SENTINEL 0x76543210 - -/* 64 object BVH + 64 mesh BVH + 64 object node splitting */ -#define BVH_STACK_SIZE 192 -#define BVH_QSTACK_SIZE 384 -#define TRI_NODE_SIZE 3 - #include "geom_attribute.h" #include "geom_object.h" #include "geom_triangle.h" @@ -32,5 +24,4 @@ #include "geom_curve.h" #include "geom_volume.h" #include "geom_primitive.h" -#include "geom_bvh.h" diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h deleted file mode 100644 index f8d563f0afa..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh.h +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation - * Modifications Copyright 2011, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* BVH - * - * Bounding volume hierarchy for ray tracing. We compile different variations - * of the same BVH traversal function for faster rendering when some types of - * primitives are not needed, using #includes to work around the lack of - * C++ templates in OpenCL. - * - * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs", - * the code has been extended and modified to support more primitives and work - * with CPU/CUDA/OpenCL. */ - -CCL_NAMESPACE_BEGIN - -/* Don't inline intersect functions on GPU, this is faster */ -#ifdef __KERNEL_GPU__ -# define ccl_device_intersect ccl_device_noinline -#else -# define ccl_device_intersect ccl_device_inline -#endif - -/* BVH intersection function variations */ - -#define BVH_INSTANCING 1 -#define BVH_MOTION 2 -#define BVH_HAIR 4 -#define BVH_HAIR_MINIMUM_WIDTH 8 - -#define BVH_NAME_JOIN(x,y) x ## _ ## y -#define BVH_NAME_EVAL(x,y) BVH_NAME_JOIN(x,y) -#define BVH_FUNCTION_FULL_NAME(prefix) BVH_NAME_EVAL(prefix, BVH_FUNCTION_NAME) - -#define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0) - -/* Debugging heleprs */ -#ifdef __KERNEL_DEBUG__ -# define BVH_DEBUG_INIT() \ - do { \ - isect->num_traversal_steps = 0; \ - isect->num_traversed_instances = 0; \ - } while(0) -# define BVH_DEBUG_NEXT_STEP() \ - do { \ - ++isect->num_traversal_steps; \ - } while(0) -# define BVH_DEBUG_NEXT_INSTANCE() \ - do { \ - ++isect->num_traversed_instances; \ - } while(0) -#else /* __KERNEL_DEBUG__ */ -# define BVH_DEBUG_INIT() -# define BVH_DEBUG_NEXT_STEP() -# define BVH_DEBUG_NEXT_INSTANCE() -#endif /* __KERNEL_DEBUG__ */ - - -/* Common QBVH functions. */ -#ifdef __QBVH__ -# include "geom_qbvh.h" -#endif - -/* Regular BVH traversal */ - -#include "geom_bvh_nodes.h" - -#define BVH_FUNCTION_NAME bvh_intersect -#define BVH_FUNCTION_FEATURES 0 -#include "geom_bvh_traversal.h" - -#if defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING -# include "geom_bvh_traversal.h" -#endif - -#if defined(__HAIR__) -# define BVH_FUNCTION_NAME bvh_intersect_hair -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH -# include "geom_bvh_traversal.h" -#endif - -#if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION -# include "geom_bvh_traversal.h" -#endif - -#if defined(__HAIR__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_hair_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION -# include "geom_bvh_traversal.h" -#endif - -/* Subsurface scattering BVH traversal */ - -#if defined(__SUBSURFACE__) -# define BVH_FUNCTION_NAME bvh_intersect_subsurface -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "geom_bvh_subsurface.h" -#endif - -#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR -# include "geom_bvh_subsurface.h" -#endif - -/* Volume BVH traversal */ - -#if defined(__VOLUME__) -# define BVH_FUNCTION_NAME bvh_intersect_volume -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "geom_bvh_volume.h" -#endif - -#if defined(__VOLUME__) && defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR -# include "geom_bvh_volume.h" -#endif - -#if defined(__VOLUME__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR -# include "geom_bvh_volume.h" -#endif - -/* Record all intersections - Shadow BVH traversal */ - -#if defined(__SHADOW_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all -# define BVH_FUNCTION_FEATURES 0 -# include "geom_bvh_shadow.h" -#endif - -#if defined(__SHADOW_RECORD_ALL__) && defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING -# include "geom_bvh_shadow.h" -#endif - -#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR -# include "geom_bvh_shadow.h" -#endif - -#if defined(__SHADOW_RECORD_ALL__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION -# include "geom_bvh_shadow.h" -#endif - -#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION -# include "geom_bvh_shadow.h" -#endif - -/* Record all intersections - Volume BVH traversal */ - -#if defined(__VOLUME_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "geom_bvh_volume_all.h" -#endif - -#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR -# include "geom_bvh_volume_all.h" -#endif - -#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR -# include "geom_bvh_volume_all.h" -#endif - -#undef BVH_FEATURE -#undef BVH_NAME_JOIN -#undef BVH_NAME_EVAL -#undef BVH_FUNCTION_FULL_NAME - -ccl_device_intersect bool scene_intersect(KernelGlobals *kg, - const Ray *ray, - const uint visibility, - Intersection *isect, - uint *lcg_state, - float difl, - float extmax) -{ -#ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { -# ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_hair_motion(kg, ray, isect, visibility, lcg_state, difl, extmax); -# endif /* __HAIR__ */ - - return bvh_intersect_motion(kg, ray, isect, visibility); - } -#endif /* __OBJECT_MOTION__ */ - -#ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_hair(kg, ray, isect, visibility, lcg_state, difl, extmax); -#endif /* __HAIR__ */ - -#ifdef __KERNEL_CPU__ - -# ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_instancing(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ - - return bvh_intersect(kg, ray, isect, visibility); -#else /* __KERNEL_CPU__ */ - -# ifdef __INSTANCING__ - return bvh_intersect_instancing(kg, ray, isect, visibility); -# else - return bvh_intersect(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ - -#endif /* __KERNEL_CPU__ */ -} - -#ifdef __SUBSURFACE__ -ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg, - const Ray *ray, - SubsurfaceIntersection *ss_isect, - int subsurface_object, - uint *lcg_state, - int max_hits) -{ -#ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { - return bvh_intersect_subsurface_motion(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); - } -#endif /* __OBJECT_MOTION__ */ - return bvh_intersect_subsurface(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); -} -#endif - -#ifdef __SHADOW_RECORD_ALL__ -ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits) -{ -# ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { -# ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, max_hits, num_hits); -# endif /* __HAIR__ */ - - return bvh_intersect_shadow_all_motion(kg, ray, isect, max_hits, num_hits); - } -# endif /* __OBJECT_MOTION__ */ - -# ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_shadow_all_hair(kg, ray, isect, max_hits, num_hits); -# endif /* __HAIR__ */ - -# ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits); -# endif /* __INSTANCING__ */ - - return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits); -} -#endif /* __SHADOW_RECORD_ALL__ */ - -#ifdef __VOLUME__ -ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility) -{ -# ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { - return bvh_intersect_volume_motion(kg, ray, isect, visibility); - } -# endif /* __OBJECT_MOTION__ */ -# ifdef __KERNEL_CPU__ -# ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_volume_instancing(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ - return bvh_intersect_volume(kg, ray, isect, visibility); -# else /* __KERNEL_CPU__ */ -# ifdef __INSTANCING__ - return bvh_intersect_volume_instancing(kg, ray, isect, visibility); -# else - return bvh_intersect_volume(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ -# endif /* __KERNEL_CPU__ */ -} -#endif /* __VOLUME__ */ - -#ifdef __VOLUME_RECORD_ALL__ -ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint max_hits, - const uint visibility) -{ -# ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { - return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); - } -# endif /* __OBJECT_MOTION__ */ -# ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility); -# endif /* __INSTANCING__ */ - return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); -} -#endif /* __VOLUME_RECORD_ALL__ */ - - -/* Ray offset to avoid self intersection. - * - * This function should be used to compute a modified ray start position for - * rays leaving from a surface. */ - -ccl_device_inline float3 ray_offset(float3 P, float3 Ng) -{ -#ifdef __INTERSECTION_REFINE__ - const float epsilon_f = 1e-5f; - /* ideally this should match epsilon_f, but instancing and motion blur - * precision makes it problematic */ - const float epsilon_test = 1.0f; - const int epsilon_i = 32; - - float3 res; - - /* x component */ - if(fabsf(P.x) < epsilon_test) { - res.x = P.x + Ng.x*epsilon_f; - } - else { - uint ix = __float_as_uint(P.x); - ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i; - res.x = __uint_as_float(ix); - } - - /* y component */ - if(fabsf(P.y) < epsilon_test) { - res.y = P.y + Ng.y*epsilon_f; - } - else { - uint iy = __float_as_uint(P.y); - iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i; - res.y = __uint_as_float(iy); - } - - /* z component */ - if(fabsf(P.z) < epsilon_test) { - res.z = P.z + Ng.z*epsilon_f; - } - else { - uint iz = __float_as_uint(P.z); - iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i; - res.z = __uint_as_float(iz); - } - - return res; -#else - const float epsilon_f = 1e-4f; - return P + epsilon_f*Ng; -#endif -} - -#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__) -/* ToDo: Move to another file? */ -ccl_device int intersections_compare(const void *a, const void *b) -{ - const Intersection *isect_a = (const Intersection*)a; - const Intersection *isect_b = (const Intersection*)b; - - if(isect_a->t < isect_b->t) - return -1; - else if(isect_a->t > isect_b->t) - return 1; - else - return 0; -} -#endif - -CCL_NAMESPACE_END - diff --git a/intern/cycles/kernel/geom/geom_bvh_nodes.h b/intern/cycles/kernel/geom/geom_bvh_nodes.h deleted file mode 100644 index 5b0d8785d0e..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh_nodes.h +++ /dev/null @@ -1,656 +0,0 @@ -/* - * Copyright 2011-2016, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and -// 3-vector which might be faster. -ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg, - int nodeAddr, - int child) -{ - Transform space; - const int child_addr = nodeAddr + child * 3; - space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1); - space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2); - space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3); - space.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f); - return space; -} - -#if !defined(__KERNEL_SSE2__) -ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg, - const float3 P, - const float3 idir, - const float t, - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - - /* fetch node data */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1); - float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2); - float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3); - - /* intersect ray against child nodes */ - float c0lox = (node0.x - P.x) * idir.x; - float c0hix = (node0.z - P.x) * idir.x; - float c0loy = (node1.x - P.y) * idir.y; - float c0hiy = (node1.z - P.y) * idir.y; - float c0loz = (node2.x - P.z) * idir.z; - float c0hiz = (node2.z - P.z) * idir.z; - float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); - float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); - - float c1lox = (node0.y - P.x) * idir.x; - float c1hix = (node0.w - P.x) * idir.x; - float c1loy = (node1.y - P.y) * idir.y; - float c1hiy = (node1.w - P.y) * idir.y; - float c1loz = (node2.y - P.z) * idir.z; - float c1hiz = (node2.w - P.z) * idir.z; - float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); - float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); - - dist[0] = c0min; - dist[1] = c1min; - -#ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); -#else - return ((c0max >= c0min)? 1: 0) | - ((c1max >= c1min)? 2: 0); -#endif -} - -ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg, - const float3 P, - const float3 idir, - const float t, - const float difl, - const float extmax, - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - - /* fetch node data */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1); - float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2); - float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3); - - /* intersect ray against child nodes */ - float c0lox = (node0.x - P.x) * idir.x; - float c0hix = (node0.z - P.x) * idir.x; - float c0loy = (node1.x - P.y) * idir.y; - float c0hiy = (node1.z - P.y) * idir.y; - float c0loz = (node2.x - P.z) * idir.z; - float c0hiz = (node2.z - P.z) * idir.z; - float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); - float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); - - float c1lox = (node0.y - P.x) * idir.x; - float c1hix = (node0.w - P.x) * idir.x; - float c1loy = (node1.y - P.y) * idir.y; - float c1hiy = (node1.w - P.y) * idir.y; - float c1loz = (node2.y - P.z) * idir.z; - float c1hiz = (node2.w - P.z) * idir.z; - float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); - float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); - - if(difl != 0.0f) { - float hdiff = 1.0f + difl; - float ldiff = 1.0f - difl; - if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) { - c0min = max(ldiff * c0min, c0min - extmax); - c0max = min(hdiff * c0max, c0max + extmax); - } - if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) { - c1min = max(ldiff * c1min, c1min - extmax); - c1max = min(hdiff * c1max, c1max + extmax); - } - } - - dist[0] = c0min; - dist[1] = c1min; - -#ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); -#else - return ((c0max >= c0min)? 1: 0) | - ((c1max >= c1min)? 2: 0); -#endif -} - -ccl_device_inline bool bvh_unaligned_node_intersect_child( - KernelGlobals *kg, - const float3 P, - const float3 dir, - const float t, - int nodeAddr, - int child, - float dist[2]) -{ - Transform space = bvh_unaligned_node_fetch_space(kg, nodeAddr, child); - float3 aligned_dir = transform_direction(&space, dir); - float3 aligned_P = transform_point(&space, P); - float3 nrdir = -bvh_inverse_direction(aligned_dir); - float3 tLowerXYZ = aligned_P * nrdir; - float3 tUpperXYZ = tLowerXYZ - nrdir; - const float tNearX = min(tLowerXYZ.x, tUpperXYZ.x); - const float tNearY = min(tLowerXYZ.y, tUpperXYZ.y); - const float tNearZ = min(tLowerXYZ.z, tUpperXYZ.z); - const float tFarX = max(tLowerXYZ.x, tUpperXYZ.x); - const float tFarY = max(tLowerXYZ.y, tUpperXYZ.y); - const float tFarZ = max(tLowerXYZ.z, tUpperXYZ.z); - const float tNear = max4(0.0f, tNearX, tNearY, tNearZ); - const float tFar = min4(t, tFarX, tFarY, tFarZ); - *dist = tNear; - return tNear <= tFar; -} - -ccl_device_inline bool bvh_unaligned_node_intersect_child_robust( - KernelGlobals *kg, - const float3 P, - const float3 dir, - const float t, - const float difl, - int nodeAddr, - int child, - float dist[2]) -{ - Transform space = bvh_unaligned_node_fetch_space(kg, nodeAddr, child); - float3 aligned_dir = transform_direction(&space, dir); - float3 aligned_P = transform_point(&space, P); - float3 nrdir = -bvh_inverse_direction(aligned_dir); - float3 tLowerXYZ = aligned_P * nrdir; - float3 tUpperXYZ = tLowerXYZ - nrdir; - const float tNearX = min(tLowerXYZ.x, tUpperXYZ.x); - const float tNearY = min(tLowerXYZ.y, tUpperXYZ.y); - const float tNearZ = min(tLowerXYZ.z, tUpperXYZ.z); - const float tFarX = max(tLowerXYZ.x, tUpperXYZ.x); - const float tFarY = max(tLowerXYZ.y, tUpperXYZ.y); - const float tFarZ = max(tLowerXYZ.z, tUpperXYZ.z); - const float tNear = max4(0.0f, tNearX, tNearY, tNearZ); - const float tFar = min4(t, tFarX, tFarY, tFarZ); - *dist = tNear; - if(difl != 0.0f) { - /* TODO(sergey): Same as for QBVH, needs a proper use. */ - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - return round_down*tNear <= round_up*tFar; - } - else { - return tNear <= tFar; - } -} - -ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg, - const float3 P, - const float3 dir, - const float3 idir, - const float t, - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - int mask = 0; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 0, &dist[0])) { -#ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(cnodes.x) & visibility)) -#endif - { - mask |= 1; - } - } - if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 1, &dist[1])) { -#ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(cnodes.y) & visibility)) -#endif - { - mask |= 2; - } - } - return mask; -} - -ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg, - const float3 P, - const float3 dir, - const float3 idir, - const float t, - const float difl, - const float extmax, - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - int mask = 0; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, nodeAddr, 0, &dist[0])) { -#ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(cnodes.x) & visibility)) -#endif - { - mask |= 1; - } - } - if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, nodeAddr, 1, &dist[1])) { -#ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(cnodes.y) & visibility)) -#endif - { - mask |= 2; - } - } - return mask; -} - -ccl_device_inline int bvh_node_intersect(KernelGlobals *kg, - const float3 P, - const float3 dir, - const float3 idir, - const float t, - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect(kg, - P, - dir, - idir, - t, - nodeAddr, - visibility, - dist); - } - else { - return bvh_aligned_node_intersect(kg, - P, - idir, - t, - nodeAddr, - visibility, - dist); - } -} - -ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg, - const float3 P, - const float3 dir, - const float3 idir, - const float t, - const float difl, - const float extmax, - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect_robust(kg, - P, - dir, - idir, - t, - difl, - extmax, - nodeAddr, - visibility, - dist); - } - else { - return bvh_aligned_node_intersect_robust(kg, - P, - idir, - t, - difl, - extmax, - nodeAddr, - visibility, - dist); - } -} -#else /* !defined(__KERNEL_SSE2__) */ - -int ccl_device_inline bvh_aligned_node_intersect( - KernelGlobals *kg, - const float3& P, - const float3& dir, - const ssef& tsplat, - const ssef Psplat[3], - const ssef idirsplat[3], - const shuffle_swap_t shufflexyz[3], - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - - /* fetch node data */ - const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr; - - /* intersect ray against child nodes */ - const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; - const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; - const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; - - /* calculate { c0min, c1min, -c0max, -c1max} */ - ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); - const ssef tminmax = minmax ^ pn; - const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); - - dist[0] = tminmax[0]; - dist[1] = tminmax[1]; - - int mask = movemask(lrhit); - -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); - return cmask; -# else - return mask & 3; -# endif -} - -int ccl_device_inline bvh_aligned_node_intersect_robust( - KernelGlobals *kg, - const float3& P, - const float3& dir, - const ssef& tsplat, - const ssef Psplat[3], - const ssef idirsplat[3], - const shuffle_swap_t shufflexyz[3], - const float difl, - const float extmax, - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - - /* fetch node data */ - const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr; - - /* intersect ray against child nodes */ - const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; - const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; - const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; - - /* calculate { c0min, c1min, -c0max, -c1max} */ - ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); - const ssef tminmax = minmax ^ pn; - - if(difl != 0.0f) { - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - float4 *tminmaxview = (float4*)&tminmax; - float& c0min = tminmaxview->x, &c1min = tminmaxview->y; - float& c0max = tminmaxview->z, &c1max = tminmaxview->w; - float hdiff = 1.0f + difl; - float ldiff = 1.0f - difl; - if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) { - c0min = max(ldiff * c0min, c0min - extmax); - c0max = min(hdiff * c0max, c0max + extmax); - } - if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) { - c1min = max(ldiff * c1min, c1min - extmax); - c1max = min(hdiff * c1max, c1max + extmax); - } - } - - const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); - - dist[0] = tminmax[0]; - dist[1] = tminmax[1]; - - int mask = movemask(lrhit); - -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); - return cmask; -# else - return mask & 3; -# endif -} - -int ccl_device_inline bvh_unaligned_node_intersect(KernelGlobals *kg, - const float3 P, - const float3 dir, - const ssef& tnear, - const ssef& tfar, - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - Transform space0 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 0); - Transform space1 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 1); - - float3 aligned_dir0 = transform_direction(&space0, dir), - aligned_dir1 = transform_direction(&space1, dir);; - float3 aligned_P0 = transform_point(&space0, P), - aligned_P1 = transform_point(&space1, P); - float3 nrdir0 = -bvh_inverse_direction(aligned_dir0), - nrdir1 = -bvh_inverse_direction(aligned_dir1); - - ssef tLowerX = ssef(aligned_P0.x * nrdir0.x, - aligned_P1.x * nrdir1.x, - 0.0f, 0.0f), - tLowerY = ssef(aligned_P0.y * nrdir0.y, - aligned_P1.y * nrdir1.y, - 0.0f, - 0.0f), - tLowerZ = ssef(aligned_P0.z * nrdir0.z, - aligned_P1.z * nrdir1.z, - 0.0f, - 0.0f); - - ssef tUpperX = tLowerX - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f), - tUpperY = tLowerY - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f), - tUpperZ = tLowerZ - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f); - - ssef tnear_x = min(tLowerX, tUpperX); - ssef tnear_y = min(tLowerY, tUpperY); - ssef tnear_z = min(tLowerZ, tUpperZ); - ssef tfar_x = max(tLowerX, tUpperX); - ssef tfar_y = max(tLowerY, tUpperY); - ssef tfar_z = max(tLowerZ, tUpperZ); - - const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); - const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); - sseb vmask = tNear <= tFar; - dist[0] = tNear.f[0]; - dist[1] = tNear.f[1]; - - int mask = (int)movemask(vmask); - -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); - return cmask; -# else - return mask & 3; -# endif -} - -int ccl_device_inline bvh_unaligned_node_intersect_robust(KernelGlobals *kg, - const float3 P, - const float3 dir, - const ssef& tnear, - const ssef& tfar, - const float difl, - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - Transform space0 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 0); - Transform space1 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 1); - - float3 aligned_dir0 = transform_direction(&space0, dir), - aligned_dir1 = transform_direction(&space1, dir);; - float3 aligned_P0 = transform_point(&space0, P), - aligned_P1 = transform_point(&space1, P); - float3 nrdir0 = -bvh_inverse_direction(aligned_dir0), - nrdir1 = -bvh_inverse_direction(aligned_dir1); - - ssef tLowerX = ssef(aligned_P0.x * nrdir0.x, - aligned_P1.x * nrdir1.x, - 0.0f, 0.0f), - tLowerY = ssef(aligned_P0.y * nrdir0.y, - aligned_P1.y * nrdir1.y, - 0.0f, - 0.0f), - tLowerZ = ssef(aligned_P0.z * nrdir0.z, - aligned_P1.z * nrdir1.z, - 0.0f, - 0.0f); - - ssef tUpperX = tLowerX - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f), - tUpperY = tLowerY - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f), - tUpperZ = tLowerZ - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f); - - ssef tnear_x = min(tLowerX, tUpperX); - ssef tnear_y = min(tLowerY, tUpperY); - ssef tnear_z = min(tLowerZ, tUpperZ); - ssef tfar_x = max(tLowerX, tUpperX); - ssef tfar_y = max(tLowerY, tUpperY); - ssef tfar_z = max(tLowerZ, tUpperZ); - - const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); - const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); - sseb vmask; - if(difl != 0.0f) { - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - vmask = round_down*tNear <= round_up*tFar; - } - else { - vmask = tNear <= tFar; - } - - dist[0] = tNear.f[0]; - dist[1] = tNear.f[1]; - - int mask = (int)movemask(vmask); - -# ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); - return cmask; -# else - return mask & 3; -# endif -} - -ccl_device_inline int bvh_node_intersect(KernelGlobals *kg, - const float3& P, - const float3& dir, - const ssef& tnear, - const ssef& tfar, - const ssef& tsplat, - const ssef Psplat[3], - const ssef idirsplat[3], - const shuffle_swap_t shufflexyz[3], - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect(kg, - P, - dir, - tnear, - tfar, - nodeAddr, - visibility, - dist); - } - else { - return bvh_aligned_node_intersect(kg, - P, - dir, - tsplat, - Psplat, - idirsplat, - shufflexyz, - nodeAddr, - visibility, - dist); - } -} - -ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg, - const float3& P, - const float3& dir, - const ssef& tnear, - const ssef& tfar, - const ssef& tsplat, - const ssef Psplat[3], - const ssef idirsplat[3], - const shuffle_swap_t shufflexyz[3], - const float difl, - const float extmax, - const int nodeAddr, - const uint visibility, - float dist[2]) -{ - float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect_robust(kg, - P, - dir, - tnear, - tfar, - difl, - nodeAddr, - visibility, - dist); - } - else { - return bvh_aligned_node_intersect_robust(kg, - P, - dir, - tsplat, - Psplat, - idirsplat, - shufflexyz, - difl, - extmax, - nodeAddr, - visibility, - dist); - } -} -#endif /* !defined(__KERNEL_SSE2__) */ diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h deleted file mode 100644 index a54c6024152..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh_shadow.h +++ /dev/null @@ -1,386 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2013, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef __QBVH__ -# include "geom_qbvh_shadow.h" -#endif - -#if BVH_FEATURE(BVH_HAIR) -# define NODE_INTERSECT bvh_node_intersect -#else -# define NODE_INTERSECT bvh_aligned_node_intersect -#endif - -/* This is a template BVH traversal function, where various features can be - * enabled/disabled. This way we can compile optimized versions for each case - * without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_HAIR: hair curve rendering - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - uint *num_hits) -{ - /* todo: - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* ray parameters in registers */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - -#if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; -#endif - - *num_hits = 0; - isect_array->t = tmax; - -#if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; -# if BVH_FEATURE(BVH_HAIR) - ssef tnear(0.0f), tfar(isect_t); -# endif - shuffle_swap_t shufflexyz[3]; - - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif /* __KERNEL_SSE2__ */ - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - int nodeAddrChild1, traverse_mask; - float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - -#if !defined(__KERNEL_SSE2__) - traverse_mask = NODE_INTERSECT(kg, - P, -# if BVH_FEATURE(BVH_HAIR) - dir, -# endif - idir, - isect_t, - nodeAddr, - PATH_RAY_SHADOW, - dist); -#else // __KERNEL_SSE2__ - traverse_mask = NODE_INTERSECT(kg, - P, - dir, -# if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, -# endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - nodeAddr, - PATH_RAY_SHADOW, - dist); -#endif // __KERNEL_SSE2__ - - nodeAddr = __float_as_int(cnodes.z); - nodeAddrChild1 = __float_as_int(cnodes.w); - - if(traverse_mask == 3) { - /* Both children were intersected, push the farther one. */ - bool closestChild1 = (dist[1] < dist[0]); - - if(closestChild1) { - int tmp = nodeAddr; - nodeAddr = nodeAddrChild1; - nodeAddrChild1 = tmp; - } - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = nodeAddrChild1; - } - else { - /* One child was intersected. */ - if(traverse_mask == 2) { - nodeAddr = nodeAddrChild1; - } - else if(traverse_mask == 0) { - /* Neither child was intersected. */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } - - /* if node is leaf, fetch triangle list */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - const int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* primitive intersection */ - while(primAddr < primAddr2) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - - bool hit; - - /* todo: specialized intersect functions which don't fill in - * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? - * might give a few % performance improvement */ - - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr); - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr); - break; - } -#endif -#if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) - hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); - else - hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); - break; - } -#endif - default: { - hit = false; - break; - } - } - - /* shadow ray early termination */ - if(hit) { - /* detect if this surface has a shader with transparent shadows */ - - /* todo: optimize so primitive visibility flag indicates if - * the primitive has a transparent shadow shader? */ - int prim = kernel_tex_fetch(__prim_index, isect_array->prim); - int shader = 0; - -#ifdef __HAIR__ - if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) -#endif - { - shader = kernel_tex_fetch(__tri_shader, prim); - } -#ifdef __HAIR__ - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } -#endif - int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2); - - /* if no transparent shadows, all light is blocked */ - if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { - return true; - } - /* if maximum number of hits reached, block all light */ - else if(*num_hits == max_hits) { - return true; - } - - /* move on to next entry in intersections array */ - isect_array++; - (*num_hits)++; -#if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -#endif - - isect_array->t = isect_t; - } - - primAddr++; - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - num_hits_in_instance = 0; - isect_array->t = isect_t; - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); -# if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect_t); -# endif - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - if(num_hits_in_instance) { - float t_fac; - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); -# else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - - /* scale isect->t to adjust for instancing */ - for(int i = 0; i < num_hits_in_instance; i++) - (isect_array-i-1)->t *= t_fac; - } - else { - float ignore_t = FLT_MAX; - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - } - - isect_t = tmax; - isect_array->t = isect_t; - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); -# if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect_t); -# endif - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return false; -} - -ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - uint *num_hits) -{ -#ifdef __QBVH__ - if(kernel_data.bvh.use_qbvh) { - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect_array, - max_hits, - num_hits); - } - else -#endif - { - kernel_assert(kernel_data.bvh.use_qbvh == false); - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect_array, - max_hits, - num_hits); - } -} - -#undef BVH_FUNCTION_NAME -#undef BVH_FUNCTION_FEATURES -#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h deleted file mode 100644 index 88aaf01d682..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2013, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef __QBVH__ -# include "geom_qbvh_subsurface.h" -#endif - -#if BVH_FEATURE(BVH_HAIR) -# define NODE_INTERSECT bvh_node_intersect -#else -# define NODE_INTERSECT bvh_aligned_node_intersect -#endif - -/* This is a template BVH traversal function for subsurface scattering, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - SubsurfaceIntersection *ss_isect, - int subsurface_object, - uint *lcg_state, - int max_hits) -{ - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stackPtr = 0; - int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object); - - /* ray parameters in registers */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = ray->t; - - ss_isect->num_hits = 0; - - const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object); - if(!(object_flag & SD_TRANSFORM_APPLIED)) { -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; - bvh_instance_motion_push(kg, - subsurface_object, - ray, - &P, - &dir, - &idir, - &isect_t, - &ob_itfm); -#else - bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t); -#endif - object = subsurface_object; - } - -#if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; -# if BVH_FEATURE(BVH_HAIR) - ssef tnear(0.0f), tfar(isect_t); -# endif - shuffle_swap_t shufflexyz[3]; - - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - int nodeAddrChild1, traverse_mask; - float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - -#if !defined(__KERNEL_SSE2__) - traverse_mask = NODE_INTERSECT(kg, - P, -# if BVH_FEATURE(BVH_HAIR) - dir, -# endif - idir, - isect_t, - nodeAddr, - PATH_RAY_ALL_VISIBILITY, - dist); -#else // __KERNEL_SSE2__ - traverse_mask = NODE_INTERSECT(kg, - P, - dir, -# if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, -# endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - nodeAddr, - PATH_RAY_ALL_VISIBILITY, - dist); -#endif // __KERNEL_SSE2__ - - nodeAddr = __float_as_int(cnodes.z); - nodeAddrChild1 = __float_as_int(cnodes.w); - - if(traverse_mask == 3) { - /* Both children were intersected, push the farther one. */ - bool closestChild1 = (dist[1] < dist[0]); - - if(closestChild1) { - int tmp = nodeAddr; - nodeAddr = nodeAddrChild1; - nodeAddrChild1 = tmp; - } - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = nodeAddrChild1; - } - else { - /* One child was intersected. */ - if(traverse_mask == 2) { - nodeAddr = nodeAddrChild1; - } - else if(traverse_mask == 0) { - /* Neither child was intersected. */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } - - /* if node is leaf, fetch triangle list */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); - int primAddr = __float_as_int(leaf.x); - - const int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - triangle_intersect_subsurface(kg, - &isect_precalc, - ss_isect, - P, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - motion_triangle_intersect_subsurface(kg, - ss_isect, - P, - dir, - ray->time, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; - } -#endif - default: { - break; - } - } - } - } while(nodeAddr != ENTRYPOINT_SENTINEL); - } while(nodeAddr != ENTRYPOINT_SENTINEL); -} - -ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg, - const Ray *ray, - SubsurfaceIntersection *ss_isect, - int subsurface_object, - uint *lcg_state, - int max_hits) -{ -#ifdef __QBVH__ - if(kernel_data.bvh.use_qbvh) { - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); - } - else -#endif - { - kernel_assert(kernel_data.bvh.use_qbvh == false); - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - ss_isect, - subsurface_object, - lcg_state, - max_hits); - } -} - -#undef BVH_FUNCTION_NAME -#undef BVH_FUNCTION_FEATURES -#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h deleted file mode 100644 index f409dd5f403..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh_traversal.h +++ /dev/null @@ -1,428 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2013, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef __QBVH__ -# include "geom_qbvh_traversal.h" -#endif - -#if BVH_FEATURE(BVH_HAIR) -# define NODE_INTERSECT bvh_node_intersect -# define NODE_INTERSECT_ROBUST bvh_node_intersect_robust -#else -# define NODE_INTERSECT bvh_aligned_node_intersect -# define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust -#endif - -/* This is a template BVH traversal function, where various features can be - * enabled/disabled. This way we can compile optimized versions for each case - * without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_HAIR: hair curve rendering - * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , uint *lcg_state, - float difl, - float extmax -#endif - ) -{ - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* ray parameters in registers */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; - - BVH_DEBUG_INIT(); - -#if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; -# if BVH_FEATURE(BVH_HAIR) - ssef tnear(0.0f), tfar(isect->t); -# endif - shuffle_swap_t shufflexyz[3]; - - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - int nodeAddrChild1, traverse_mask; - float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - -#if !defined(__KERNEL_SSE2__) -# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if(difl != 0.0f) { - traverse_mask = NODE_INTERSECT_ROBUST(kg, - P, -# if BVH_FEATURE(BVH_HAIR) - dir, -# endif - idir, - isect->t, - difl, - extmax, - nodeAddr, - visibility, - dist); - } - else -# endif - { - traverse_mask = NODE_INTERSECT(kg, - P, -# if BVH_FEATURE(BVH_HAIR) - dir, -# endif - idir, - isect->t, - nodeAddr, - visibility, - dist); - } -#else // __KERNEL_SSE2__ -# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if(difl != 0.0f) { - traverse_mask = NODE_INTERSECT_ROBUST(kg, - P, - dir, -# if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, -# endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - difl, - extmax, - nodeAddr, - visibility, - dist); - } - else -# endif - { - traverse_mask = NODE_INTERSECT(kg, - P, - dir, -# if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, -# endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - nodeAddr, - visibility, - dist); - } -#endif // __KERNEL_SSE2__ - - nodeAddr = __float_as_int(cnodes.z); - nodeAddrChild1 = __float_as_int(cnodes.w); - - if(traverse_mask == 3) { - /* Both children were intersected, push the farther one. */ - bool closestChild1 = (dist[1] < dist[0]); - - if(closestChild1) { - int tmp = nodeAddr; - nodeAddr = nodeAddrChild1; - nodeAddrChild1 = tmp; - } - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = nodeAddrChild1; - } - else { - /* One child was intersected. */ - if(traverse_mask == 2) { - nodeAddr = nodeAddrChild1; - } - else if(traverse_mask == 0) { - /* Neither child was intersected. */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - BVH_DEBUG_NEXT_STEP(); - } - - /* if node is leaf, fetch triangle list */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - const int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) { - /* shadow ray early termination */ -#if defined(__KERNEL_SSE2__) - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); -# if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); -# endif -#else - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; -#endif - } - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) { - /* shadow ray early termination */ -# if defined(__KERNEL_SSE2__) - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); -# if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); -# endif -# else - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; -# endif - } - } - break; - } -#endif /* BVH_FEATURE(BVH_MOTION) */ -#if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - bool hit; - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) - hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); - else - hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); - if(hit) { - /* shadow ray early termination */ -# if defined(__KERNEL_SSE2__) - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); -# if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); -# endif -# else - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; -# endif - } - } - break; - } -#endif /* BVH_FEATURE(BVH_HAIR) */ - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); -# if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); -# endif - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - - BVH_DEBUG_NEXT_INSTANCE(); - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* instance pop */ -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); -# if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); -# endif - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return (isect->prim != PRIM_NONE); -} - -ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , uint *lcg_state, - float difl, - float extmax -#endif - ) -{ -#ifdef __QBVH__ - if(kernel_data.bvh.use_qbvh) { - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect, - visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , lcg_state, - difl, - extmax -#endif - ); - } - else -#endif - { - kernel_assert(kernel_data.bvh.use_qbvh == false); - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect, - visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , lcg_state, - difl, - extmax -#endif - ); - } -} - -#undef BVH_FUNCTION_NAME -#undef BVH_FUNCTION_FEATURES -#undef NODE_INTERSECT -#undef NODE_INTERSECT_ROBUST diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h deleted file mode 100644 index 5e70ce99f51..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh_volume.h +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef __QBVH__ -# include "geom_qbvh_volume.h" -#endif - -#if BVH_FEATURE(BVH_HAIR) -# define NODE_INTERSECT bvh_node_intersect -#else -# define NODE_INTERSECT bvh_aligned_node_intersect -#endif - -/* This is a template BVH traversal function for volumes, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility) -{ - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* ray parameters in registers */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; - -#if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; -# if BVH_FEATURE(BVH_HAIR) - ssef tnear(0.0f), tfar(isect->t); -# endif - shuffle_swap_t shufflexyz[3]; - - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - int nodeAddrChild1, traverse_mask; - float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - -#if !defined(__KERNEL_SSE2__) - traverse_mask = NODE_INTERSECT(kg, - P, -# if BVH_FEATURE(BVH_HAIR) - dir, -# endif - idir, - isect->t, - nodeAddr, - visibility, - dist); -#else // __KERNEL_SSE2__ - traverse_mask = NODE_INTERSECT(kg, - P, - dir, -# if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, -# endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - nodeAddr, - visibility, - dist); -#endif // __KERNEL_SSE2__ - - nodeAddr = __float_as_int(cnodes.z); - nodeAddrChild1 = __float_as_int(cnodes.w); - - if(traverse_mask == 3) { - /* Both children were intersected, push the farther one. */ - bool closestChild1 = (dist[1] < dist[0]); - - if(closestChild1) { - int tmp = nodeAddr; - nodeAddr = nodeAddrChild1; - nodeAddrChild1 = tmp; - } - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = nodeAddrChild1; - } - else { - /* One child was intersected. */ - if(traverse_mask == 2) { - nodeAddr = nodeAddrChild1; - } - else if(traverse_mask == 0) { - /* Neither child was intersected. */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } - - /* if node is leaf, fetch triangle list */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - const int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr); - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr); - } - break; - } -#endif - default: { - break; - } - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - - if(object_flag & SD_OBJECT_HAS_VOLUME) { - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); -# if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); -# endif - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - else { - /* pop */ - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* instance pop */ -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); -# if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); -# endif - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } -#endif /* FEATURE(BVH_MOTION) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return (isect->prim != PRIM_NONE); -} - -ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility) -{ -#ifdef __QBVH__ - if(kernel_data.bvh.use_qbvh) { - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect, - visibility); - } - else -#endif - { - kernel_assert(kernel_data.bvh.use_qbvh == false); - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect, - visibility); - } -} - -#undef BVH_FUNCTION_NAME -#undef BVH_FUNCTION_FEATURES -#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/geom/geom_bvh_volume_all.h b/intern/cycles/kernel/geom/geom_bvh_volume_all.h deleted file mode 100644 index ab5ac8505a3..00000000000 --- a/intern/cycles/kernel/geom/geom_bvh_volume_all.h +++ /dev/null @@ -1,397 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef __QBVH__ -# include "geom_qbvh_volume_all.h" -#endif - -#if BVH_FEATURE(BVH_HAIR) -# define NODE_INTERSECT bvh_node_intersect -#else -# define NODE_INTERSECT bvh_aligned_node_intersect -#endif - -/* This is a template BVH traversal function for volumes, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_MOTION: motion blur rendering - * - */ - -ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - const uint visibility) -{ - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* ray parameters in registers */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - -#if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; -#endif - - uint num_hits = 0; - isect_array->t = tmax; - -#if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; -# if BVH_FEATURE(BVH_HAIR) - ssef tnear(0.0f), tfar(isect_t); -# endif - shuffle_swap_t shufflexyz[3]; - - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - int nodeAddrChild1, traverse_mask; - float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - -#if !defined(__KERNEL_SSE2__) - traverse_mask = NODE_INTERSECT(kg, - P, -# if BVH_FEATURE(BVH_HAIR) - dir, -# endif - idir, - isect_t, - nodeAddr, - visibility, - dist); -#else // __KERNEL_SSE2__ - traverse_mask = NODE_INTERSECT(kg, - P, - dir, -# if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, -# endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - nodeAddr, - visibility, - dist); -#endif // __KERNEL_SSE2__ - - nodeAddr = __float_as_int(cnodes.z); - nodeAddrChild1 = __float_as_int(cnodes.w); - - if(traverse_mask == 3) { - /* Both children were intersected, push the farther one. */ - bool closestChild1 = (dist[1] < dist[0]); - - if(closestChild1) { - int tmp = nodeAddr; - nodeAddr = nodeAddrChild1; - nodeAddrChild1 = tmp; - } - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = nodeAddrChild1; - } - else { - /* One child was intersected. */ - if(traverse_mask == 2) { - nodeAddr = nodeAddrChild1; - } - else if(traverse_mask == 0) { - /* Neither child was intersected. */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } - - /* if node is leaf, fetch triangle list */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - const int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - bool hit; - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; -#if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -#endif - isect_array->t = isect_t; - if(num_hits == max_hits) { -#if BVH_FEATURE(BVH_INSTANCING) -# if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -# else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -# endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } -#endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* intersect ray against primitive */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; -# if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -# endif - isect_array->t = isect_t; - if(num_hits == max_hits) { -# if BVH_FEATURE(BVH_INSTANCING) -# if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -# else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -# endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } -# endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } -#endif /* BVH_MOTION */ - default: { - break; - } - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - - if(object_flag & SD_OBJECT_HAS_VOLUME) { - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - num_hits_in_instance = 0; - isect_array->t = isect_t; - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); -# if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect_t); -# endif - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - ++stackPtr; - kernel_assert(stackPtr < BVH_STACK_SIZE); - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - else { - /* pop */ - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - if(num_hits_in_instance) { - float t_fac; -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); -# else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - /* Scale isect->t to adjust for instancing. */ - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } - else { - float ignore_t = FLT_MAX; -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - } - - isect_t = tmax; - isect_array->t = isect_t; - -# if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); - - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); -# if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect_t); -# endif - - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -# endif - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } -#endif /* FEATURE(BVH_MOTION) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return num_hits; -} - -ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - const uint visibility) -{ -#ifdef __QBVH__ - if(kernel_data.bvh.use_qbvh) { - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect_array, - max_hits, - visibility); - } - else -#endif - { - kernel_assert(kernel_data.bvh.use_qbvh == false); - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect_array, - max_hits, - visibility); - } -} - -#undef BVH_FUNCTION_NAME -#undef BVH_FUNCTION_FEATURES -#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/geom/geom_qbvh.h b/intern/cycles/kernel/geom/geom_qbvh.h deleted file mode 100644 index 5eda3213acb..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh.h +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -struct QBVHStackItem { - int addr; - float dist; -}; - -/* TOOD(sergey): Investigate if using intrinsics helps for both - * stack item swap and float comparison. - */ -ccl_device_inline void qbvh_item_swap(QBVHStackItem *__restrict a, - QBVHStackItem *__restrict b) -{ - QBVHStackItem tmp = *a; - *a = *b; - *b = tmp; -} - -ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1, - QBVHStackItem *__restrict s2, - QBVHStackItem *__restrict s3) -{ - if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } - if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); } - if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } -} - -ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1, - QBVHStackItem *__restrict s2, - QBVHStackItem *__restrict s3, - QBVHStackItem *__restrict s4) -{ - if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } - if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); } - if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); } - if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); } - if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); } -} - -/* Axis-aligned nodes intersection */ - -ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg, - const ssef& tnear, - const ssef& tfar, -#ifdef __KERNEL_AVX2__ - const sse3f& org_idir, -#else - const sse3f& org, -#endif - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int nodeAddr, - ssef *__restrict dist) -{ - const int offset = nodeAddr + 1; -#ifdef __KERNEL_AVX2__ - const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x); - const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y); - const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z); - const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x); - const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y); - const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z); -#else - const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x; - const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y; - const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z; - const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x; - const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y; - const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z; -#endif - -#ifdef __KERNEL_SSE41__ - const ssef tNear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, tnear)); - const ssef tFar = mini(mini(tfar_x, tfar_y), mini(tfar_z, tfar)); - const sseb vmask = cast(tNear) > cast(tFar); - int mask = (int)movemask(vmask)^0xf; -#else - const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); - const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); - const sseb vmask = tNear <= tFar; - int mask = (int)movemask(vmask); -#endif - *dist = tNear; - return mask; -} - -ccl_device_inline int qbvh_aligned_node_intersect_robust( - KernelGlobals *__restrict kg, - const ssef& tnear, - const ssef& tfar, -#ifdef __KERNEL_AVX2__ - const sse3f& P_idir, -#else - const sse3f& P, -#endif - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int nodeAddr, - const float difl, - ssef *__restrict dist) -{ - const int offset = nodeAddr + 1; -#ifdef __KERNEL_AVX2__ - const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x); - const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y); - const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z); - const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x); - const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y); - const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z); -#else - const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x; - const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y; - const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z; - const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x; - const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y; - const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z; -#endif - - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); - const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); - const sseb vmask = round_down*tNear <= round_up*tFar; - *dist = tNear; - return (int)movemask(vmask); -} - -/* Unaligned nodes intersection */ - -ccl_device_inline int qbvh_unaligned_node_intersect( - KernelGlobals *__restrict kg, - const ssef& tnear, - const ssef& tfar, -#ifdef __KERNEL_AVX2__ - const sse3f& org_idir, -#endif - const sse3f& org, - const sse3f& dir, - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int nodeAddr, - ssef *__restrict dist) -{ - const int offset = nodeAddr; - const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1); - const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2); - const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3); - - const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4); - const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5); - const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6); - - const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7); - const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8); - const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9); - - const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10); - const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11); - const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12); - - const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z, - aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z, - aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z; - - const ssef aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x, - aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y, - aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z; - - const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f); - const ssef nrdir_x = neg_one / aligned_dir_x, - nrdir_y = neg_one / aligned_dir_y, - nrdir_z = neg_one / aligned_dir_z; - - const ssef tlower_x = aligned_P_x * nrdir_x, - tlower_y = aligned_P_y * nrdir_y, - tlower_z = aligned_P_z * nrdir_z; - - const ssef tupper_x = tlower_x - nrdir_x, - tupper_y = tlower_y - nrdir_y, - tupper_z = tlower_z - nrdir_z; - -#ifdef __KERNEL_SSE41__ - const ssef tnear_x = mini(tlower_x, tupper_x); - const ssef tnear_y = mini(tlower_y, tupper_y); - const ssef tnear_z = mini(tlower_z, tupper_z); - const ssef tfar_x = maxi(tlower_x, tupper_x); - const ssef tfar_y = maxi(tlower_y, tupper_y); - const ssef tfar_z = maxi(tlower_z, tupper_z); - const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z); - const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z); - const sseb vmask = tNear <= tFar; - *dist = tNear; - return movemask(vmask); -#else - const ssef tnear_x = min(tlower_x, tupper_x); - const ssef tnear_y = min(tlower_y, tupper_y); - const ssef tnear_z = min(tlower_z, tupper_z); - const ssef tfar_x = max(tlower_x, tupper_x); - const ssef tfar_y = max(tlower_y, tupper_y); - const ssef tfar_z = max(tlower_z, tupper_z); - const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z); - const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z); - const sseb vmask = tNear <= tFar; - *dist = tNear; - return movemask(vmask); -#endif -} - -ccl_device_inline int qbvh_unaligned_node_intersect_robust( - KernelGlobals *__restrict kg, - const ssef& tnear, - const ssef& tfar, -#ifdef __KERNEL_AVX2__ - const sse3f& P_idir, -#endif - const sse3f& P, - const sse3f& dir, - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int nodeAddr, - const float difl, - ssef *__restrict dist) -{ - const int offset = nodeAddr; - const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1); - const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2); - const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3); - - const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4); - const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5); - const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6); - - const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7); - const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8); - const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9); - - const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10); - const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11); - const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12); - - const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z, - aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z, - aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z; - - const ssef aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x, - aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y, - aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z; - - const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f); - const ssef nrdir_x = neg_one / aligned_dir_x, - nrdir_y = neg_one / aligned_dir_y, - nrdir_z = neg_one / aligned_dir_z; - - const ssef tlower_x = aligned_P_x * nrdir_x, - tlower_y = aligned_P_y * nrdir_y, - tlower_z = aligned_P_z * nrdir_z; - - const ssef tupper_x = tlower_x - nrdir_x, - tupper_y = tlower_y - nrdir_y, - tupper_z = tlower_z - nrdir_z; - - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - -#ifdef __KERNEL_SSE41__ - const ssef tnear_x = mini(tlower_x, tupper_x); - const ssef tnear_y = mini(tlower_y, tupper_y); - const ssef tnear_z = mini(tlower_z, tupper_z); - const ssef tfar_x = maxi(tlower_x, tupper_x); - const ssef tfar_y = maxi(tlower_y, tupper_y); - const ssef tfar_z = maxi(tlower_z, tupper_z); -#else - const ssef tnear_x = min(tlower_x, tupper_x); - const ssef tnear_y = min(tlower_y, tupper_y); - const ssef tnear_z = min(tlower_z, tupper_z); - const ssef tfar_x = max(tlower_x, tupper_x); - const ssef tfar_y = max(tlower_y, tupper_y); - const ssef tfar_z = max(tlower_z, tupper_z); -#endif - const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z); - const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z); - const sseb vmask = round_down*tNear <= round_up*tFar; - *dist = tNear; - return movemask(vmask); -} - -/* Intersectors wrappers. - * - * They'll check node type and call appropriate intersection code. - */ - -ccl_device_inline int qbvh_node_intersect( - KernelGlobals *__restrict kg, - const ssef& tnear, - const ssef& tfar, -#ifdef __KERNEL_AVX2__ - const sse3f& org_idir, -#endif - const sse3f& org, - const sse3f& dir, - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int nodeAddr, - ssef *__restrict dist) -{ - const int offset = nodeAddr; - const float4 node = kernel_tex_fetch(__bvh_nodes, offset); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return qbvh_unaligned_node_intersect(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - org_idir, -#endif - org, - dir, - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - dist); - } - else { - return qbvh_aligned_node_intersect(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - org_idir, -#else - org, -#endif - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - dist); - } -} - -ccl_device_inline int qbvh_node_intersect_robust( - KernelGlobals *__restrict kg, - const ssef& tnear, - const ssef& tfar, -#ifdef __KERNEL_AVX2__ - const sse3f& P_idir, -#endif - const sse3f& P, - const sse3f& dir, - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int nodeAddr, - const float difl, - ssef *__restrict dist) -{ - const int offset = nodeAddr; - const float4 node = kernel_tex_fetch(__bvh_nodes, offset); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return qbvh_unaligned_node_intersect_robust(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir, -#endif - P, - dir, - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - difl, - dist); - } - else { - return qbvh_aligned_node_intersect_robust(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir, -#else - P, -#endif - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - difl, - dist); - } -} diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h deleted file mode 100644 index e5e611a0d47..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh_shadow.h +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This is a template BVH traversal function, where various features can be - * enabled/disabled. This way we can compile optimized versions for each case - * without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_HAIR: hair curve rendering - * BVH_MOTION: motion blur rendering - * - */ - -#if BVH_FEATURE(BVH_HAIR) -# define NODE_INTERSECT qbvh_node_intersect -#else -# define NODE_INTERSECT qbvh_aligned_node_intersect -#endif - -ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - uint *num_hits) -{ - /* TODO(sergey): - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; - traversalStack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* Ray parameters in registers. */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - *num_hits = 0; - isect_array->t = tmax; - -#ifndef __KERNEL_SSE41__ - if(!isfinite(P.x)) { - return false; - } -#endif - -#if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; -#endif - - ssef tnear(0.0f), tfar(tmax); -#if BVH_FEATURE(BVH_HAIR) - sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -#endif - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); - -#ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); -#endif -#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); -#endif - - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - -#ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0) { - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } -#endif - - ssef dist; - int traverseChild = NODE_INTERSECT(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir4, -#endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, -# endif -# if BVH_FEATURE(BVH_HAIR) - dir4, -# endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - &dist); - - if(traverseChild != 0) { - float4 cnodes; -#if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13); - } - else -#endif - { - cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(traverseChild); - if(traverseChild == 0) { - nodeAddr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(traverseChild); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(traverseChild == 0) { - if(d1 < d0) { - nodeAddr = c1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - continue; - } - else { - nodeAddr = c0; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(traverseChild == 0) { - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2]); - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c3; - traversalStack[stackPtr].dist = d3; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3]); - } - - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - - /* If node is leaf, fetch triangle list. */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); -#ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) { - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } -#endif - - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - - /* Primitive intersection. */ - while(primAddr < primAddr2) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - - bool hit; - - /* todo: specialized intersect functions which don't fill in - * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? - * might give a few % performance improvement */ - - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr); - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr); - break; - } -#endif -#if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) - hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); - else - hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); - break; - } -#endif - default: { - hit = false; - break; - } - } - - /* Shadow ray early termination. */ - if(hit) { - /* detect if this surface has a shader with transparent shadows */ - - /* todo: optimize so primitive visibility flag indicates if - * the primitive has a transparent shadow shader? */ - int prim = kernel_tex_fetch(__prim_index, isect_array->prim); - int shader = 0; - -#ifdef __HAIR__ - if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) -#endif - { - shader = kernel_tex_fetch(__tri_shader, prim); - } -#ifdef __HAIR__ - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } -#endif - int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2); - - /* if no transparent shadows, all light is blocked */ - if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { - return true; - } - /* if maximum number of hits reached, block all light */ - else if(*num_hits == max_hits) { - return true; - } - - /* move on to next entry in intersections array */ - isect_array++; - (*num_hits)++; -#if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -#endif - - isect_array->t = isect_t; - } - - primAddr++; - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); -# endif - - num_hits_in_instance = 0; - isect_array->t = isect_t; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect_t); -# if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -# endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - if(num_hits_in_instance) { - float t_fac; - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); -# else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); -# endif - - /* scale isect->t to adjust for instancing */ - for(int i = 0; i < num_hits_in_instance; i++) - (isect_array-i-1)->t *= t_fac; - } - else { - float ignore_t = FLT_MAX; - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); -# endif - } - - isect_t = tmax; - isect_array->t = isect_t; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(tmax); -# if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -# endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return false; -} - -#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h deleted file mode 100644 index 4adaf9c8f3d..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This is a template BVH traversal function for subsurface scattering, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_MOTION: motion blur rendering - * - */ - -#if BVH_FEATURE(BVH_HAIR) -# define NODE_INTERSECT qbvh_node_intersect -#else -# define NODE_INTERSECT qbvh_aligned_node_intersect -#endif - -ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, - const Ray *ray, - SubsurfaceIntersection *ss_isect, - int subsurface_object, - uint *lcg_state, - int max_hits) -{ - /* TODO(sergey): - * - Test if pushing distance on the stack helps (for non shadow rays). - * - Separate version for shadow rays. - * - Likely and unlikely for if() statements. - * - SSE for hair. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; - traversalStack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stackPtr = 0; - int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object); - - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = ray->t; - - ss_isect->num_hits = 0; - - const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object); - if(!(object_flag & SD_TRANSFORM_APPLIED)) { -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; - bvh_instance_motion_push(kg, - subsurface_object, - ray, - &P, - &dir, - &idir, - &isect_t, - &ob_itfm); -#else - bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t); -#endif - object = subsurface_object; - } - -#ifndef __KERNEL_SSE41__ - if(!isfinite(P.x)) { - return; - } -#endif - - ssef tnear(0.0f), tfar(isect_t); -#if BVH_FEATURE(BVH_HAIR) - sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -#endif - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); - -#ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); -#endif -#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); -#endif - - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - ssef dist; - - int traverseChild = NODE_INTERSECT(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir4, -#endif -#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, -#endif -#if BVH_FEATURE(BVH_HAIR) - dir4, -#endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - &dist); - - if(traverseChild != 0) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - float4 cnodes; -#if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13); - } - else -#endif - { - cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(traverseChild); - if(traverseChild == 0) { - nodeAddr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(traverseChild); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(traverseChild == 0) { - if(d1 < d0) { - nodeAddr = c1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - continue; - } - else { - nodeAddr = c0; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(traverseChild == 0) { - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2]); - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c3; - traversalStack[stackPtr].dist = d3; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3]); - } - - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - - /* If node is leaf, fetch triangle list. */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); - int primAddr = __float_as_int(leaf.x); - - int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - - /* Primitive intersection. */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* Intersect ray against primitive, */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - triangle_intersect_subsurface(kg, - &isect_precalc, - ss_isect, - P, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* Intersect ray against primitive. */ - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - motion_triangle_intersect_subsurface(kg, - ss_isect, - P, - dir, - ray->time, - object, - primAddr, - isect_t, - lcg_state, - max_hits); - } - break; - } -#endif - default: - break; - } - } - } while(nodeAddr != ENTRYPOINT_SENTINEL); - } while(nodeAddr != ENTRYPOINT_SENTINEL); -} - -#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/geom/geom_qbvh_traversal.h b/intern/cycles/kernel/geom/geom_qbvh_traversal.h deleted file mode 100644 index 24bf85f46c8..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh_traversal.h +++ /dev/null @@ -1,465 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This is a template BVH traversal function, where various features can be - * enabled/disabled. This way we can compile optimized versions for each case - * without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_HAIR: hair curve rendering - * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width - * BVH_MOTION: motion blur rendering - * - */ - -#if BVH_FEATURE(BVH_HAIR) -# define NODE_INTERSECT qbvh_node_intersect -# define NODE_INTERSECT_ROBUST qbvh_node_intersect_robust -#else -# define NODE_INTERSECT qbvh_aligned_node_intersect -# define NODE_INTERSECT_ROBUST qbvh_aligned_node_intersect_robust -#endif - -ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - ,uint *lcg_state, - float difl, - float extmax -#endif - ) -{ - /* TODO(sergey): - * - Test if pushing distance on the stack helps (for non shadow rays). - * - Separate version for shadow rays. - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; - traversalStack[0].addr = ENTRYPOINT_SENTINEL; - traversalStack[0].dist = -FLT_MAX; - - /* Traversal variables in registers. */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - float nodeDist = -FLT_MAX; - - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - -#ifndef __KERNEL_SSE41__ - if(!isfinite(P.x)) { - return false; - } -#endif - - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; - - BVH_DEBUG_INIT(); - - ssef tnear(0.0f), tfar(ray->t); -#if BVH_FEATURE(BVH_HAIR) - sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -#endif - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); - -#ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -#endif -#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -#endif - - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - - if(UNLIKELY(nodeDist > isect->t) -#ifdef __VISIBILITY_FLAG__ - || (__float_as_uint(inodes.x) & visibility) == 0) -#endif - { - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - continue; - } - - int traverseChild; - ssef dist; - - BVH_DEBUG_NEXT_STEP(); - -#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if(difl != 0.0f) { - /* NOTE: We extend all the child BB instead of fetching - * and checking visibility flags for each of the, - * - * Need to test if doing opposite would be any faster. - */ - traverseChild = NODE_INTERSECT_ROBUST(kg, - tnear, - tfar, -# ifdef __KERNEL_AVX2__ - P_idir4, -# endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, -# endif -# if BVH_FEATURE(BVH_HAIR) - dir4, -# endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - difl, - &dist); - } - else -#endif /* BVH_HAIR_MINIMUM_WIDTH */ - { - traverseChild = NODE_INTERSECT(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir4, -#endif -#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, -#endif -#if BVH_FEATURE(BVH_HAIR) - dir4, -#endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - &dist); - } - - if(traverseChild != 0) { - float4 cnodes; - /* TODO(sergey): Investigate whether moving cnodes upwards - * gives a speedup (will be different cache pattern but will - * avoid extra check here), - */ -#if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13); - } - else -#endif - { - cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(traverseChild); - float d0 = ((float*)&dist)[r]; - if(traverseChild == 0) { - nodeAddr = __float_as_int(cnodes[r]); - nodeDist = d0; - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - r = __bscf(traverseChild); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(traverseChild == 0) { - if(d1 < d0) { - nodeAddr = c1; - nodeDist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - continue; - } - else { - nodeAddr = c0; - nodeDist = d0; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(traverseChild == 0) { - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2]); - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c3; - traversalStack[stackPtr].dist = d3; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3]); - } - - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - } - - /* If node is leaf, fetch triangle list. */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); - -#ifdef __VISIBILITY_FLAG__ - if(UNLIKELY((nodeDist > isect->t) || - ((__float_as_uint(leaf.z) & visibility) == 0))) -#else - if(UNLIKELY((nodeDist > isect->t))) -#endif - { - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - continue; - } - - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - - /* Primitive intersection. */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) { - tfar = ssef(isect->t); - /* Shadow ray early termination. */ - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - } - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) { - tfar = ssef(isect->t); - /* Shadow ray early termination. */ - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - } - } - break; - } -#endif /* BVH_FEATURE(BVH_MOTION) */ -#if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - for(; primAddr < primAddr2; primAddr++) { - BVH_DEBUG_NEXT_STEP(); - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - bool hit; - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) - hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); - else - hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); - if(hit) { - tfar = ssef(isect->t); - /* Shadow ray early termination. */ - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - } - } - break; - } -#endif /* BVH_FEATURE(BVH_HAIR) */ - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - -# if BVH_FEATURE(BVH_MOTION) - qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_itfm); -# else - qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist); -# endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect->t); -# if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -# endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; - traversalStack[stackPtr].dist = -FLT_MAX; - - nodeAddr = kernel_tex_fetch(__object_node, object); - - BVH_DEBUG_NEXT_INSTANCE(); - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* Instance pop. */ -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect->t); -# if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -# endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - nodeDist = traversalStack[stackPtr].dist; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return (isect->prim != PRIM_NONE); -} - -#undef NODE_INTERSECT -#undef NODE_INTERSECT_ROBUST diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume.h b/intern/cycles/kernel/geom/geom_qbvh_volume.h deleted file mode 100644 index da21ede9e12..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh_volume.h +++ /dev/null @@ -1,374 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This is a template BVH traversal function for volumes, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_MOTION: motion blur rendering - * - */ - -#if BVH_FEATURE(BVH_HAIR) -# define NODE_INTERSECT qbvh_node_intersect -#else -# define NODE_INTERSECT qbvh_aligned_node_intersect -#endif - -ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility) -{ - /* TODO(sergey): - * - Test if pushing distance on the stack helps. - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; - traversalStack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - -#ifndef __KERNEL_SSE41__ - if(!isfinite(P.x)) { - return false; - } -#endif - - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; - - ssef tnear(0.0f), tfar(ray->t); -#if BVH_FEATURE(BVH_HAIR) - sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -#endif - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); - -#ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); -#endif -#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); -#endif - - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { -#ifdef __VISIBILITY_FLAG__ - float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - if((__float_as_uint(inodes.x) & visibility) == 0) { - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } -#endif - - ssef dist; - int traverseChild = NODE_INTERSECT(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir4, -#endif -#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, -#endif -#if BVH_FEATURE(BVH_HAIR) - dir4, -#endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - &dist); - - if(traverseChild != 0) { - float4 cnodes; -#if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13); - } - else -#endif - { - cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(traverseChild); - if(traverseChild == 0) { - nodeAddr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(traverseChild); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(traverseChild == 0) { - if(d1 < d0) { - nodeAddr = c1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - continue; - } - else { - nodeAddr = c0; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(traverseChild == 0) { - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2]); - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c3; - traversalStack[stackPtr].dist = d3; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3]); - } - - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - - /* If node is leaf, fetch triangle list. */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - - /* Primitive intersection. */ - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr); - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr); - } - break; - } -#endif - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - - if(object_flag & SD_OBJECT_HAS_VOLUME) { - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect->t); -# if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -# endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - else { - /* Pop. */ - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* Instance pop. */ -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); -# endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect->t); -# if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -# endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return (isect->prim != PRIM_NONE); -} - -#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h deleted file mode 100644 index 8a31775fae3..00000000000 --- a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation, - * and code copyright 2009-2012 Intel Corporation - * - * Modifications Copyright 2011-2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This is a template BVH traversal function for volumes, where - * various features can be enabled/disabled. This way we can compile optimized - * versions for each case without new features slowing things down. - * - * BVH_INSTANCING: object instancing - * BVH_MOTION: motion blur rendering - * - */ - -#if BVH_FEATURE(BVH_HAIR) -# define NODE_INTERSECT qbvh_node_intersect -#else -# define NODE_INTERSECT qbvh_aligned_node_intersect -#endif - -ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - const uint visibility) -{ - /* TODO(sergey): - * - Test if pushing distance on the stack helps. - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; - traversalStack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* Ray parameters in registers. */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - uint num_hits = 0; - isect_array->t = tmax; - -#ifndef __KERNEL_SSE41__ - if(!isfinite(P.x)) { - return false; - } -#endif - -#if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; -#endif - - ssef tnear(0.0f), tfar(isect_t); -#if BVH_FEATURE(BVH_HAIR) - sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -#endif - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); - -#ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); -#endif -#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); -#endif - - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - - IsectPrecalc isect_precalc; - triangle_intersect_precalc(dir, &isect_precalc); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { -#ifdef __VISIBILITY_FLAG__ - float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - if((__float_as_uint(inodes.x) & visibility) == 0) { - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } -#endif - - ssef dist; - int traverseChild = NODE_INTERSECT(kg, - tnear, - tfar, -#ifdef __KERNEL_AVX2__ - P_idir4, -#endif -#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, -#endif -#if BVH_FEATURE(BVH_HAIR) - dir4, -#endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - nodeAddr, - &dist); - - if(traverseChild != 0) { - float4 cnodes; -#if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13); - } - else -#endif - { - cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(traverseChild); - if(traverseChild == 0) { - nodeAddr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(traverseChild); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(traverseChild == 0) { - if(d1 < d0) { - nodeAddr = c1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - continue; - } - else { - nodeAddr = c0; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c1; - traversalStack[stackPtr].dist = d1; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c0; - traversalStack[stackPtr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(traverseChild == 0) { - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2]); - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(traverseChild); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c3; - traversalStack[stackPtr].dist = d3; - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = c2; - traversalStack[stackPtr].dist = d2; - qbvh_stack_sort(&traversalStack[stackPtr], - &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3]); - } - - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - - /* If node is leaf, fetch triangle list. */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)); - int primAddr = __float_as_int(leaf.x); - -#if BVH_FEATURE(BVH_INSTANCING) - if(primAddr >= 0) { -#endif - int primAddr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - bool hit; - - /* Pop. */ - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - - /* Primitive intersection. */ - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; -#if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -#endif - isect_array->t = isect_t; - if(num_hits == max_hits) { -#if BVH_FEATURE(BVH_INSTANCING) -# if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -# else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -# endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } -#endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } -#if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; primAddr < primAddr2; primAddr++) { - kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; -# if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; -# endif - isect_array->t = isect_t; - if(num_hits == max_hits) { -# if BVH_FEATURE(BVH_INSTANCING) -# if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -# else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -# endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } -# endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } -#endif - } - } -#if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - - if(object_flag & SD_OBJECT_HAS_VOLUME) { - -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); -# else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); -# endif - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect_t); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -# endif -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - num_hits_in_instance = 0; - isect_array->t = isect_t; - - ++stackPtr; - kernel_assert(stackPtr < BVH_QSTACK_SIZE); - traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } - else { - /* Pop. */ - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#if BVH_FEATURE(BVH_INSTANCING) - if(stackPtr >= 0) { - kernel_assert(object != OBJECT_NONE); - - /* Instance pop. */ - if(num_hits_in_instance) { - float t_fac; -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); -# else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - /* Scale isect->t to adjust for instancing. */ - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } - else { - float ignore_t = FLT_MAX; -# if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); -# else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); -# endif - triangle_intersect_precalc(dir, &isect_precalc); - } - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } - tfar = ssef(isect_t); -# if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -# endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); -# ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); -# endif -# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); -# endif - - triangle_intersect_precalc(dir, &isect_precalc); - isect_t = tmax; - isect_array->t = isect_t; - - object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr].addr; - --stackPtr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return num_hits; -} - -#undef NODE_INTERSECT diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 3c3503eab8b..d5b31037723 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -25,6 +25,7 @@ #include "kernel_camera.h" #include "geom/geom.h" +#include "bvh/bvh.h" #include "kernel_accumulate.h" #include "kernel_shader.h" diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index aad06ed5c76..37907cd8fdc 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -35,6 +35,7 @@ # include "../../kernel_montecarlo.h" # include "../../kernel_projection.h" # include "../../geom/geom.h" +# include "../../bvh/bvh.h" # include "../../kernel_accumulate.h" # include "../../kernel_camera.h" diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index acc6887cb17..2bb2be5e6b3 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -47,6 +47,7 @@ #include "kernel_camera.h" #include "kernels/cpu/kernel_cpu_image.h" #include "geom/geom.h" +#include "bvh/bvh.h" #include "kernel_projection.h" #include "kernel_accumulate.h" diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index e1c7e2cea99..88d6dab04d0 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -31,6 +31,7 @@ #include "kernel_camera.h" #include "geom/geom.h" +#include "bvh/bvh.h" #include "kernel_accumulate.h" #include "kernel_shader.h" -- cgit v1.2.3