diff options
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/kernel/bvh/bvh.h | 442 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_curve_intersect.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_optix.h | 89 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_globals.h | 41 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 29 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_shader.h | 14 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_shadow.h | 54 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_subsurface.h | 20 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 37 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/optix/kernel_optix.cu | 294 |
10 files changed, 826 insertions, 198 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index 162b2fb5cdb..d0bc1fe4b36 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -33,136 +33,140 @@ CCL_NAMESPACE_BEGIN #include "kernel/bvh/bvh_types.h" +#ifndef __KERNEL_OPTIX__ + /* Common QBVH functions. */ -#ifdef __QBVH__ -# include "kernel/bvh/qbvh_nodes.h" -# ifdef __KERNEL_AVX2__ -# include "kernel/bvh/obvh_nodes.h" +# ifdef __QBVH__ +# include "kernel/bvh/qbvh_nodes.h" +# ifdef __KERNEL_AVX2__ +# include "kernel/bvh/obvh_nodes.h" +# endif # endif -#endif /* Regular BVH traversal */ -#include "kernel/bvh/bvh_nodes.h" +# include "kernel/bvh/bvh_nodes.h" -#define BVH_FUNCTION_NAME bvh_intersect -#define BVH_FUNCTION_FEATURES 0 -#include "kernel/bvh/bvh_traversal.h" - -#if defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING +# define BVH_FUNCTION_NAME bvh_intersect +# define BVH_FUNCTION_FEATURES 0 # include "kernel/bvh/bvh_traversal.h" -#endif -#if defined(__HAIR__) -# define BVH_FUNCTION_NAME bvh_intersect_hair -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR -# include "kernel/bvh/bvh_traversal.h" -#endif +# if defined(__INSTANCING__) +# define BVH_FUNCTION_NAME bvh_intersect_instancing +# define BVH_FUNCTION_FEATURES BVH_INSTANCING +# include "kernel/bvh/bvh_traversal.h" +# endif -#if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION -# include "kernel/bvh/bvh_traversal.h" -#endif +# if defined(__HAIR__) +# define BVH_FUNCTION_NAME bvh_intersect_hair +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR +# include "kernel/bvh/bvh_traversal.h" +# endif -#if defined(__HAIR__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_hair_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION -# include "kernel/bvh/bvh_traversal.h" -#endif +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION +# include "kernel/bvh/bvh_traversal.h" +# endif -/* Subsurface scattering BVH traversal */ +# if defined(__HAIR__) && defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_hair_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION +# include "kernel/bvh/bvh_traversal.h" +# endif -#if defined(__BVH_LOCAL__) -# define BVH_FUNCTION_NAME bvh_intersect_local -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/bvh_local.h" +/* Subsurface scattering BVH traversal */ -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_local_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR +# if defined(__BVH_LOCAL__) +# define BVH_FUNCTION_NAME bvh_intersect_local +# define BVH_FUNCTION_FEATURES BVH_HAIR # include "kernel/bvh/bvh_local.h" -# endif -#endif /* __BVH_LOCAL__ */ - -/* Volume BVH traversal */ -#if defined(__VOLUME__) -# define BVH_FUNCTION_NAME bvh_intersect_volume -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/bvh_volume.h" +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_local_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR +# include "kernel/bvh/bvh_local.h" +# endif +# endif /* __BVH_LOCAL__ */ -# if defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR -# include "kernel/bvh/bvh_volume.h" -# endif +/* Volume BVH traversal */ -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR +# if defined(__VOLUME__) +# define BVH_FUNCTION_NAME bvh_intersect_volume +# define BVH_FUNCTION_FEATURES BVH_HAIR # include "kernel/bvh/bvh_volume.h" -# endif -#endif /* __VOLUME__ */ - -/* Record all intersections - Shadow BVH traversal */ -#if defined(__SHADOW_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all -# define BVH_FUNCTION_FEATURES 0 -# include "kernel/bvh/bvh_shadow_all.h" +# if defined(__INSTANCING__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR +# include "kernel/bvh/bvh_volume.h" +# endif -# if defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING -# include "kernel/bvh/bvh_shadow_all.h" -# endif +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR +# include "kernel/bvh/bvh_volume.h" +# endif +# endif /* __VOLUME__ */ -# if defined(__HAIR__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR -# include "kernel/bvh/bvh_shadow_all.h" -# endif +/* Record all intersections - Shadow BVH traversal */ -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION +# if defined(__SHADOW_RECORD_ALL__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all +# define BVH_FUNCTION_FEATURES 0 # include "kernel/bvh/bvh_shadow_all.h" -# endif -# if defined(__HAIR__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION -# include "kernel/bvh/bvh_shadow_all.h" -# endif -#endif /* __SHADOW_RECORD_ALL__ */ +# if defined(__INSTANCING__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing +# define BVH_FUNCTION_FEATURES BVH_INSTANCING +# include "kernel/bvh/bvh_shadow_all.h" +# endif + +# if defined(__HAIR__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR +# include "kernel/bvh/bvh_shadow_all.h" +# endif + +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION +# include "kernel/bvh/bvh_shadow_all.h" +# endif + +# if defined(__HAIR__) && defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION +# include "kernel/bvh/bvh_shadow_all.h" +# endif +# endif /* __SHADOW_RECORD_ALL__ */ /* Record all intersections - Volume BVH traversal */ -#if defined(__VOLUME_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/bvh_volume_all.h" - -# if defined(__INSTANCING__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR +# if defined(__VOLUME_RECORD_ALL__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_all +# define BVH_FUNCTION_FEATURES BVH_HAIR # include "kernel/bvh/bvh_volume_all.h" -# endif -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR -# include "kernel/bvh/bvh_volume_all.h" -# endif -#endif /* __VOLUME_RECORD_ALL__ */ +# if defined(__INSTANCING__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR +# include "kernel/bvh/bvh_volume_all.h" +# endif + +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR +# include "kernel/bvh/bvh_volume_all.h" +# endif +# endif /* __VOLUME_RECORD_ALL__ */ + +# undef BVH_FEATURE +# undef BVH_NAME_JOIN +# undef BVH_NAME_EVAL +# undef BVH_FUNCTION_FULL_NAME -#undef BVH_FEATURE -#undef BVH_NAME_JOIN -#undef BVH_NAME_EVAL -#undef BVH_FUNCTION_FULL_NAME +#endif /* __KERNEL_OPTIX__ */ ccl_device_inline bool scene_intersect_valid(const Ray *ray) { @@ -173,8 +177,10 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray) * such cases. * From production scenes so far it seems it's enough to test first element * only. + * Scene intersection may also called with empty rays for conditional trace + * calls that evaluate to false, so filter those out. */ - return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x); + return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f; } ccl_device_intersect bool scene_intersect(KernelGlobals *kg, @@ -184,10 +190,46 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, { PROFILING_INIT(kg, PROFILING_INTERSECT); +#ifdef __KERNEL_OPTIX__ + uint p0 = 0; + uint p1 = 0; + uint p2 = 0; + uint p3 = 0; + uint p4 = visibility; + uint p5 = PRIMITIVE_NONE; + + optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, + ray->P, + ray->D, + 0.0f, + ray->t, + ray->time, + 0xFF, + OPTIX_RAY_FLAG_NONE, + 0, + 0, + 0, // SBT offset for PG_HITD + p0, + p1, + p2, + p3, + p4, + p5); + + isect->t = __uint_as_float(p0); + isect->u = __uint_as_float(p1); + isect->v = __uint_as_float(p2); + isect->prim = p3; + isect->object = p4; + isect->type = p5; + + return p5 != PRIMITIVE_NONE; +#else /* __KERNEL_OPTIX__ */ if (!scene_intersect_valid(ray)) { return false; } -#ifdef __EMBREE__ + +# ifdef __EMBREE__ if (kernel_data.bvh.scene) { isect->t = ray->t; CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); @@ -202,42 +244,41 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, } return false; } -#endif /* __EMBREE__ */ -#ifdef __OBJECT_MOTION__ +# endif /* __EMBREE__ */ + +# ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { -# ifdef __HAIR__ +# ifdef __HAIR__ if (kernel_data.bvh.have_curves) { return bvh_intersect_hair_motion(kg, ray, isect, visibility); } -# endif /* __HAIR__ */ +# endif /* __HAIR__ */ return bvh_intersect_motion(kg, ray, isect, visibility); } -#endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ -#ifdef __HAIR__ +# ifdef __HAIR__ if (kernel_data.bvh.have_curves) { return bvh_intersect_hair(kg, ray, isect, visibility); } -#endif /* __HAIR__ */ - -#ifdef __KERNEL_CPU__ +# endif /* __HAIR__ */ -# ifdef __INSTANCING__ +# ifdef __KERNEL_CPU__ +# ifdef __INSTANCING__ if (kernel_data.bvh.have_instancing) { return bvh_intersect_instancing(kg, ray, isect, visibility); } -# endif /* __INSTANCING__ */ +# endif /* __INSTANCING__ */ return bvh_intersect(kg, ray, isect, visibility); -#else /* __KERNEL_CPU__ */ - -# ifdef __INSTANCING__ +# else /* __KERNEL_CPU__ */ +# ifdef __INSTANCING__ return bvh_intersect_instancing(kg, ray, isect, visibility); -# else +# else return bvh_intersect(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ - -#endif /* __KERNEL_CPU__ */ +# endif /* __INSTANCING__ */ +# endif /* __KERNEL_CPU__ */ +#endif /* __KERNEL_OPTIX__ */ } #ifdef __BVH_LOCAL__ @@ -250,11 +291,43 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, { PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL); +# ifdef __KERNEL_OPTIX__ + uint p0 = ((uint64_t)lcg_state) & 0xFFFFFFFF; + uint p1 = (((uint64_t)lcg_state) >> 32) & 0xFFFFFFFF; + uint p2 = ((uint64_t)local_isect) & 0xFFFFFFFF; + uint p3 = (((uint64_t)local_isect) >> 32) & 0xFFFFFFFF; + uint p4 = local_object; + // Is set to zero on miss or if ray is aborted, so can be used as return value + uint p5 = max_hits; + + local_isect->num_hits = 0; // Initialize hit count to zero + optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, + ray->P, + ray->D, + 0.0f, + ray->t, + ray->time, + // Need to always call into __anyhit__kernel_optix_local_hit + 0xFF, + OPTIX_RAY_FLAG_ENFORCE_ANYHIT, + 1, + 0, + 0, // SBT offset for PG_HITL + p0, + p1, + p2, + p3, + p4, + p5); + + return p5; +# else /* __KERNEL_OPTIX__ */ if (!scene_intersect_valid(ray)) { local_isect->num_hits = 0; return false; } -# ifdef __EMBREE__ + +# ifdef __EMBREE__ if (kernel_data.bvh.scene) { CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS); ctx.lcg_state = lcg_state; @@ -296,13 +369,15 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, return local_isect->num_hits > 0; } -# endif /* __EMBREE__ */ -# ifdef __OBJECT_MOTION__ +# endif /* __EMBREE__ */ + +# ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits); } -# endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits); +# endif /* __KERNEL_OPTIX__ */ } #endif @@ -316,11 +391,41 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, { PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL); +# ifdef __KERNEL_OPTIX__ + uint p0 = ((uint64_t)isect) & 0xFFFFFFFF; + uint p1 = (((uint64_t)isect) >> 32) & 0xFFFFFFFF; + uint p3 = max_hits; + uint p4 = visibility; + uint p5 = false; + + *num_hits = 0; // Initialize hit count to zero + optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, + ray->P, + ray->D, + 0.0f, + ray->t, + ray->time, + // Need to always call into __anyhit__kernel_optix_shadow_all_hit + 0xFF, + OPTIX_RAY_FLAG_ENFORCE_ANYHIT, + 2, + 0, + 0, // SBT offset for PG_HITS + p0, + p1, + *num_hits, + p3, + p4, + p5); + + return p5; +# else /* __KERNEL_OPTIX__ */ if (!scene_intersect_valid(ray)) { *num_hits = 0; return false; } -# ifdef __EMBREE__ + +# ifdef __EMBREE__ if (kernel_data.bvh.scene) { CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL); ctx.isect_s = isect; @@ -337,32 +442,41 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, *num_hits = ctx.num_hits; return rtc_ray.tfar == -INFINITY; } -# endif -# ifdef __OBJECT_MOTION__ +# endif /* __EMBREE__ */ + +# ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { -# ifdef __HAIR__ +# ifdef __HAIR__ if (kernel_data.bvh.have_curves) { return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, visibility, max_hits, num_hits); } -# endif /* __HAIR__ */ +# endif /* __HAIR__ */ return bvh_intersect_shadow_all_motion(kg, ray, isect, visibility, max_hits, num_hits); } -# endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ -# ifdef __HAIR__ +# ifdef __HAIR__ if (kernel_data.bvh.have_curves) { return bvh_intersect_shadow_all_hair(kg, ray, isect, visibility, max_hits, num_hits); } -# endif /* __HAIR__ */ +# endif /* __HAIR__ */ -# ifdef __INSTANCING__ +# ifdef __KERNEL_CPU__ +# ifdef __INSTANCING__ if (kernel_data.bvh.have_instancing) { return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits); } -# endif /* __INSTANCING__ */ - +# endif /* __INSTANCING__ */ return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits); +# else +# ifdef __INSTANCING__ + return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits); +# else + return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits); +# endif /* __INSTANCING__ */ +# endif /* __KERNEL_CPU__ */ +# endif /* __KERNEL_OPTIX__ */ } #endif /* __SHADOW_RECORD_ALL__ */ @@ -374,30 +488,67 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, { PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME); +# ifdef __KERNEL_OPTIX__ + uint p0 = 0; + uint p1 = 0; + uint p2 = 0; + uint p3 = 0; + uint p4 = visibility; + uint p5 = PRIMITIVE_NONE; + + optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, + ray->P, + ray->D, + 0.0f, + ray->t, + ray->time, + // Visibility mask set to only intersect objects with volumes + 0x02, + OPTIX_RAY_FLAG_NONE, + 0, + 0, + 0, // SBT offset for PG_HITD + p0, + p1, + p2, + p3, + p4, + p5); + + isect->t = __uint_as_float(p0); + isect->u = __uint_as_float(p1); + isect->v = __uint_as_float(p2); + isect->prim = p3; + isect->object = p4; + isect->type = p5; + + return p5 != PRIMITIVE_NONE; +# else /* __KERNEL_OPTIX__ */ if (!scene_intersect_valid(ray)) { return false; } -# ifdef __OBJECT_MOTION__ +# ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { return bvh_intersect_volume_motion(kg, ray, isect, visibility); } -# endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ -# ifdef __KERNEL_CPU__ -# ifdef __INSTANCING__ +# ifdef __KERNEL_CPU__ +# ifdef __INSTANCING__ if (kernel_data.bvh.have_instancing) { return bvh_intersect_volume_instancing(kg, ray, isect, visibility); } -# endif /* __INSTANCING__ */ +# endif /* __INSTANCING__ */ return bvh_intersect_volume(kg, ray, isect, visibility); -# else /* __KERNEL_CPU__ */ -# ifdef __INSTANCING__ +# else /* __KERNEL_CPU__ */ +# ifdef __INSTANCING__ return bvh_intersect_volume_instancing(kg, ray, isect, visibility); -# else +# else return bvh_intersect_volume(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ -# endif /* __KERNEL_CPU__ */ +# endif /* __INSTANCING__ */ +# endif /* __KERNEL_CPU__ */ +# endif /* __KERNEL_OPTIX__ */ } #endif /* __VOLUME__ */ @@ -413,6 +564,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, if (!scene_intersect_valid(ray)) { return false; } + # ifdef __EMBREE__ if (kernel_data.bvh.scene) { CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL); diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h index 0327ebf8890..7a770470150 100644 --- a/intern/cycles/kernel/geom/geom_curve_intersect.h +++ b/intern/cycles/kernel/geom/geom_curve_intersect.h @@ -38,12 +38,14 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, { const bool is_curve_primitive = (type & PRIMITIVE_CURVE); +# ifndef __KERNEL_OPTIX__ /* see OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */ if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); if (time < prim_time.x || time > prim_time.y) { return false; } } +# endif int segment = PRIMITIVE_UNPACK_SEGMENT(type); float epsilon = 0.0f; @@ -505,12 +507,14 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, const bool is_curve_primitive = (type & PRIMITIVE_CURVE); +# ifndef __KERNEL_OPTIX__ /* see OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */ if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); if (time < prim_time.x || time > prim_time.y) { return false; } } +# endif int segment = PRIMITIVE_UNPACK_SEGMENT(type); /* curve Intersection check */ diff --git a/intern/cycles/kernel/kernel_compat_optix.h b/intern/cycles/kernel/kernel_compat_optix.h new file mode 100644 index 00000000000..61b9d87a020 --- /dev/null +++ b/intern/cycles/kernel/kernel_compat_optix.h @@ -0,0 +1,89 @@ +/* + * Copyright 2019, NVIDIA Corporation. + * Copyright 2019, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __KERNEL_COMPAT_OPTIX_H__ +#define __KERNEL_COMPAT_OPTIX_H__ + +#define OPTIX_DONT_INCLUDE_CUDA +#include <optix.h> + +#define __KERNEL_GPU__ +#define __KERNEL_CUDA__ // OptiX kernels are implicitly CUDA kernels too +#define __KERNEL_OPTIX__ +#define CCL_NAMESPACE_BEGIN +#define CCL_NAMESPACE_END + +#ifndef ATTR_FALLTHROUGH +# define ATTR_FALLTHROUGH +#endif + +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +typedef unsigned short half; +typedef unsigned long long CUtexObject; + +#define FLT_MIN 1.175494350822287507969e-38f +#define FLT_MAX 340282346638528859811704183484516925440.0f + +__device__ half __float2half(const float f) +{ + half val; + asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f)); + return val; +} + +/* Selective nodes compilation. */ +#ifndef __NODES_MAX_GROUP__ +# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX +#endif +#ifndef __NODES_FEATURES__ +# define __NODES_FEATURES__ NODE_FEATURE_ALL +#endif + +#define ccl_device \ + __device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything +#define ccl_device_inline ccl_device +#define ccl_device_forceinline ccl_device +#define ccl_device_noinline __device__ __noinline__ +#define ccl_device_noinline_cpu ccl_device +#define ccl_global +#define ccl_static_constant __constant__ +#define ccl_constant const +#define ccl_local +#define ccl_local_param +#define ccl_private +#define ccl_may_alias +#define ccl_addr_space +#define ccl_restrict __restrict__ +#define ccl_ref +#define ccl_align(n) __align__(n) + +// Zero initialize structs to help the compiler figure out scoping +#define ccl_optional_struct_init = {} + +#define kernel_data __params.data // See kernel_globals.h +#define kernel_tex_array(t) __params.t +#define kernel_tex_fetch(t, index) __params.t[(index)] + +#define kernel_assert(cond) + +/* Types */ + +#include "util/util_half.h" +#include "util/util_types.h" + +#endif /* __KERNEL_COMPAT_OPTIX_H__ */ diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 9dbf3b7ea2e..a440021b6b9 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -90,12 +90,43 @@ typedef struct KernelGlobals { #endif /* __KERNEL_CPU__ */ +#ifdef __KERNEL_OPTIX__ + +typedef struct ShaderParams { + uint4 *input; + float4 *output; + int type; + int filter; + int sx; + int offset; + int sample; +} ShaderParams; + +typedef struct KernelParams { + WorkTile tile; + KernelData data; + ShaderParams shader; +# define KERNEL_TEX(type, name) const type *name; +# include "kernel/kernel_textures.h" +} KernelParams; + +typedef struct KernelGlobals { +# ifdef __VOLUME__ + VolumeState volume_state; +# endif + Intersection hits_stack[64]; +} KernelGlobals; + +extern "C" __constant__ KernelParams __params; + +#else /* __KERNEL_OPTIX__ */ + /* For CUDA, constant memory textures must be globals, so we can't put them * into a struct. As a result we don't actually use this struct and use actual * globals and simply pass along a NULL pointer everywhere, which we hope gets * optimized out. */ -#ifdef __KERNEL_CUDA__ +# ifdef __KERNEL_CUDA__ __constant__ KernelData __data; typedef struct KernelGlobals { @@ -103,10 +134,12 @@ typedef struct KernelGlobals { Intersection hits_stack[64]; } KernelGlobals; -# define KERNEL_TEX(type, name) const __constant__ __device__ type *name; -# include "kernel/kernel_textures.h" +# define KERNEL_TEX(type, name) const __constant__ __device__ type *name; +# include "kernel/kernel_textures.h" + +# endif /* __KERNEL_CUDA__ */ -#endif /* __KERNEL_CUDA__ */ +#endif /* __KERNEL_OPTIX__ */ /* OpenCL */ diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 0dc55eba14a..d45ffe9c7df 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -326,13 +326,19 @@ ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg, return true; } -ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, - ShaderData *sd, - ShaderData *emission_sd, - PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput, - float3 ao_alpha) +#ifdef __KERNEL_OPTIX__ +ccl_device_inline /* inline trace calls */ +#else +ccl_device_noinline +#endif + void + kernel_path_ao(KernelGlobals *kg, + ShaderData *sd, + ShaderData *emission_sd, + PathRadiance *L, + ccl_addr_space PathState *state, + float3 throughput, + float3 ao_alpha) { PROFILING_INIT(kg, PROFILING_AO); @@ -655,9 +661,11 @@ ccl_device void kernel_path_trace( kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray); +# ifndef __KERNEL_OPTIX__ if (ray.t == 0.0f) { return; } +# endif /* Initialize state. */ float3 throughput = make_float3(1.0f, 1.0f, 1.0f); @@ -671,6 +679,13 @@ ccl_device void kernel_path_trace( PathState state; path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray); +# ifdef __KERNEL_OPTIX__ + /* Force struct into local memory to avoid costly spilling on trace calls. */ + if (pass_stride < 0) /* This is never executed and just prevents the compiler from doing SROA. */ + for (int i = 0; i < sizeof(L); ++i) + reinterpret_cast<unsigned char *>(&L)[-pass_stride + i] = 0; +# endif + /* Integrate. */ kernel_path_integrate(kg, &state, throughput, &ray, &L, buffer, emission_sd); diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index b8202326cdf..7ccb99cad2a 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -48,10 +48,16 @@ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd } #endif -ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, - ShaderData *sd, - const Intersection *isect, - const Ray *ray) +#ifdef __KERNEL_OPTIX__ +ccl_device_inline +#else +ccl_device_noinline +#endif + void + shader_setup_from_ray(KernelGlobals *kg, + ShaderData *sd, + const Intersection *isect, + const Ray *ray) { PROFILING_INIT(kg, PROFILING_SHADER_SETUP); diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index c02d7d77faf..61fcc61264a 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -17,13 +17,6 @@ CCL_NAMESPACE_BEGIN #ifdef __VOLUME__ -typedef struct VolumeState { -# ifdef __SPLIT_KERNEL__ -# else - PathState ps; -# endif -} VolumeState; - /* Get PathState ready for use for volume stack evaluation. */ # ifdef __SPLIT_KERNEL__ ccl_addr_space @@ -55,16 +48,15 @@ ccl_addr_space /* Attenuate throughput accordingly to the given intersection event. * Returns true if the throughput is zero and traversal can be aborted. */ -ccl_device_forceinline bool shadow_handle_transparent_isect( - KernelGlobals *kg, - ShaderData *shadow_sd, - ccl_addr_space PathState *state, +ccl_device_forceinline bool shadow_handle_transparent_isect(KernelGlobals *kg, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, #ifdef __VOLUME__ - ccl_addr_space struct PathState *volume_state, + ccl_addr_space PathState *volume_state, #endif - Intersection *isect, - Ray *ray, - float3 *throughput) + Intersection *isect, + Ray *ray, + float3 *throughput) { #ifdef __VOLUME__ /* Attenuation between last surface and next surface. */ @@ -163,7 +155,11 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, uint num_hits; const bool blocked = scene_intersect_shadow_all(kg, ray, hits, visibility, max_hits, &num_hits); # ifdef __VOLUME__ +# ifdef __KERNEL_OPTIX__ + VolumeState &volume_state = kg->volume_state; +# else VolumeState volume_state; +# endif # endif /* If no opaque surface found but we did find transparent hits, * shade them. @@ -302,7 +298,11 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg, float3 *shadow) { # ifdef __VOLUME__ +# ifdef __KERNEL_OPTIX__ + VolumeState &volume_state = kg->volume_state; +# else VolumeState volume_state; +# endif # endif if (blocked && is_transparent_isect) { float3 throughput = make_float3(1.0f, 1.0f, 1.0f); @@ -387,32 +387,38 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, - Ray *ray_input, + Ray *ray, float3 *shadow) { - Ray *ray = ray_input; - Intersection isect; - /* Some common early checks. */ *shadow = make_float3(1.0f, 1.0f, 1.0f); +#if !defined(__KERNEL_OPTIX__) + /* Some common early checks. + * Avoid conditional trace call in OptiX though, since those hurt performance there. + */ if (ray->t == 0.0f) { return false; } +#endif #ifdef __SHADOW_TRICKS__ const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) ? PATH_RAY_SHADOW_NON_CATCHER : PATH_RAY_SHADOW; #else const uint visibility = PATH_RAY_SHADOW; #endif - /* Do actual shadow shading. */ - /* First of all, we check if integrator requires transparent shadows. + /* Do actual shadow shading. + * First of all, we check if integrator requires transparent shadows. * if not, we use simplest and fastest ever way to calculate occlusion. + * Do not do this in OptiX to avoid the additional trace call. */ -#ifdef __TRANSPARENT_SHADOWS__ +#if !defined(__KERNEL_OPTIX__) || !defined(__TRANSPARENT_SHADOWS__) + Intersection isect; +# ifdef __TRANSPARENT_SHADOWS__ if (!kernel_data.integrator.transparent_shadows) -#endif +# endif { return shadow_blocked_opaque(kg, shadow_sd, state, visibility, ray, &isect, shadow); } +#endif #ifdef __TRANSPARENT_SHADOWS__ # ifdef __SHADOW_RECORD_ALL__ /* For the transparent shadows we try to use record-all logic on the @@ -426,7 +432,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, return true; } const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1; -# ifdef __KERNEL_GPU__ +# if defined(__KERNEL_GPU__) && !defined(__KERNEL_OPTIX__) /* On GPU we do tricky with tracing opaque ray first, this avoids speed * regressions in some files. * diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 8dc1904058d..dbe2c12ce81 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -353,13 +353,19 @@ ccl_device void subsurface_random_walk_coefficients(const ShaderClosure *sc, *weight = safe_divide_color(bssrdf->weight, A); } -ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg, - LocalIntersection *ss_isect, - ShaderData *sd, - ccl_addr_space PathState *state, - const ShaderClosure *sc, - const float bssrdf_u, - const float bssrdf_v) +#ifdef __KERNEL_OPTIX__ +ccl_device_inline /* inline trace calls */ +#else +ccl_device_noinline +#endif + bool + subsurface_random_walk(KernelGlobals *kg, + LocalIntersection *ss_isect, + ShaderData *sd, + ccl_addr_space PathState *state, + const ShaderClosure *sc, + const float bssrdf_u, + const float bssrdf_v) { /* Sample diffuse surface scatter into the object. */ float3 D; diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index f0054691b54..7aef34b00a2 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -143,6 +143,13 @@ CCL_NAMESPACE_BEGIN # endif #endif /* __KERNEL_CUDA__ */ +#ifdef __KERNEL_OPTIX__ +# undef __BAKING__ +# undef __BRANCHED_PATH__ +/* TODO(pmours): Cannot use optixTrace in non-inlined functions */ +# undef __SHADER_RAYTRACE__ +#endif /* __KERNEL_OPTIX__ */ + #ifdef __KERNEL_OPENCL__ #endif /* __KERNEL_OPENCL__ */ @@ -1056,6 +1063,15 @@ typedef struct PathState { #endif } PathState; +#ifdef __VOLUME__ +typedef struct VolumeState { +# ifdef __SPLIT_KERNEL__ +# else + PathState ps; +# endif +} VolumeState; +#endif + /* Struct to gather multiple nearby intersections. */ typedef struct LocalIntersection { Ray ray; @@ -1343,9 +1359,12 @@ typedef enum KernelBVHLayout { BVH_LAYOUT_BVH2 = (1 << 0), BVH_LAYOUT_BVH4 = (1 << 1), BVH_LAYOUT_BVH8 = (1 << 2), + BVH_LAYOUT_EMBREE = (1 << 3), + BVH_LAYOUT_OPTIX = (1 << 4), + BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8, - BVH_LAYOUT_ALL = (unsigned int)(-1), + BVH_LAYOUT_ALL = (unsigned int)(~0u), } KernelBVHLayout; typedef struct KernelBVH { @@ -1357,14 +1376,18 @@ typedef struct KernelBVH { int bvh_layout; int use_bvh_steps; - /* Embree */ -#ifdef __EMBREE__ + /* Custom BVH */ +#ifdef __KERNEL_OPTIX__ + OptixTraversableHandle scene; +#else +# ifdef __EMBREE__ RTCScene scene; -# ifndef __KERNEL_64_BIT__ - int pad1; +# ifndef __KERNEL_64_BIT__ + int pad2; +# endif +# else + int scene, pad2; # endif -#else - int pad1, pad2; #endif } KernelBVH; static_assert_align(KernelBVH, 16); diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu b/intern/cycles/kernel/kernels/optix/kernel_optix.cu new file mode 100644 index 00000000000..c7223a49d79 --- /dev/null +++ b/intern/cycles/kernel/kernels/optix/kernel_optix.cu @@ -0,0 +1,294 @@ +/* + * Copyright 2019, NVIDIA Corporation. + * Copyright 2019, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_optix.h" +#include "util/util_atomic.h" +#include "kernel/kernel_types.h" +#include "kernel/kernel_globals.h" +#include "../cuda/kernel_cuda_image.h" // Texture lookup uses normal CUDA intrinsics + +#include "kernel/kernel_path.h" +#include "kernel/kernel_bake.h" + +template<typename T> ccl_device_forceinline T *get_payload_ptr_0() +{ + return (T *)(((uint64_t)optixGetPayload_1() << 32) | optixGetPayload_0()); +} +template<typename T> ccl_device_forceinline T *get_payload_ptr_2() +{ + return (T *)(((uint64_t)optixGetPayload_3() << 32) | optixGetPayload_2()); +} + +template<bool always = false> ccl_device_forceinline uint get_object_id() +{ +#ifdef __OBJECT_MOTION__ + // Always get the the instance ID from the TLAS + // There might be a motion transform node between TLAS and BLAS which does not have one + uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0)); +#else + uint object = optixGetInstanceId(); +#endif + // Choose between always returning object ID or only for instances + if (always) + // Can just remove the high bit since instace always contains object ID + return object & 0x7FFFFF; + // Set to OBJECT_NONE if this is not an instanced object + else if (object & 0x800000) + object = OBJECT_NONE; + return object; +} + +extern "C" __global__ void __raygen__kernel_optix_path_trace() +{ + KernelGlobals kg; // Allocate stack storage for common data + + const uint3 launch_index = optixGetLaunchIndex(); + // Keep threads for same pixel together to improve occupancy of warps + uint pixel_offset = launch_index.x / __params.tile.num_samples; + uint sample_offset = launch_index.x % __params.tile.num_samples; + + kernel_path_trace(&kg, + __params.tile.buffer, + __params.tile.start_sample + sample_offset, + __params.tile.x + pixel_offset, + __params.tile.y + launch_index.y, + __params.tile.offset, + __params.tile.stride); +} + +#ifdef __BAKING__ +extern "C" __global__ void __raygen__kernel_optix_bake() +{ + KernelGlobals kg; + const ShaderParams &p = __params.shader; + kernel_bake_evaluate(&kg, + p.input, + p.output, + (ShaderEvalType)p.type, + p.filter, + p.sx + optixGetLaunchIndex().x, + p.offset, + p.sample); +} +#endif + +extern "C" __global__ void __raygen__kernel_optix_displace() +{ + KernelGlobals kg; + const ShaderParams &p = __params.shader; + kernel_displace_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x); +} + +extern "C" __global__ void __raygen__kernel_optix_background() +{ + KernelGlobals kg; + const ShaderParams &p = __params.shader; + kernel_background_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x); +} + +extern "C" __global__ void __miss__kernel_optix_miss() +{ + // 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss + optixSetPayload_0(__float_as_uint(optixGetRayTmax())); + optixSetPayload_5(PRIMITIVE_NONE); +} + +extern "C" __global__ void __anyhit__kernel_optix_local_hit() +{ +#ifdef __BVH_LOCAL__ + const uint object = get_object_id<true>(); + if (object != optixGetPayload_4() /* local_object */) { + // Only intersect with matching object + return optixIgnoreIntersection(); + } + + int hit = 0; + uint *const lcg_state = get_payload_ptr_0<uint>(); + LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>(); + + if (lcg_state) { + const uint max_hits = optixGetPayload_5(); + for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { + if (optixGetRayTmax() == local_isect->hits[i].t) { + return optixIgnoreIntersection(); + } + } + + hit = local_isect->num_hits++; + + if (local_isect->num_hits > max_hits) { + hit = lcg_step_uint(lcg_state) % local_isect->num_hits; + if (hit >= max_hits) { + return optixIgnoreIntersection(); + } + } + } + else { + if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) { + // Record closest intersection only (do not terminate ray here, since there is no guarantee + // about distance ordering in anyhit) + return optixIgnoreIntersection(); + } + + local_isect->num_hits = 1; + } + + Intersection *isect = &local_isect->hits[hit]; + isect->t = optixGetRayTmax(); + isect->prim = optixGetPrimitiveIndex(); + isect->object = get_object_id(); + isect->type = kernel_tex_fetch(__prim_type, isect->prim); + + if (optixIsTriangleHit()) { + const float2 barycentrics = optixGetTriangleBarycentrics(); + isect->u = 1.0f - barycentrics.y - barycentrics.x; + isect->v = barycentrics.x; + } + else { + isect->u = __uint_as_float(optixGetAttribute_0()); + isect->v = __uint_as_float(optixGetAttribute_1()); + } + + // Record geometric normal + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)); + const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)); + const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); + local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); + + // Continue tracing (without this the trace call would return after the first hit) + optixIgnoreIntersection(); +#endif +} + +extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() +{ +#ifdef __SHADOW_RECORD_ALL__ + const uint prim = optixGetPrimitiveIndex(); +# ifdef __VISIBILITY_FLAG__ + const uint visibility = optixGetPayload_4(); + if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { + return optixIgnoreIntersection(); + } +# endif + + // Offset into array with num_hits + Intersection *const isect = get_payload_ptr_0<Intersection>() + optixGetPayload_2(); + isect->t = optixGetRayTmax(); + isect->prim = prim; + isect->object = get_object_id(); + isect->type = kernel_tex_fetch(__prim_type, prim); + + if (optixIsTriangleHit()) { + const float2 barycentrics = optixGetTriangleBarycentrics(); + isect->u = 1.0f - barycentrics.y - barycentrics.x; + isect->v = barycentrics.x; + } + else { + isect->u = __uint_as_float(optixGetAttribute_0()); + isect->v = __uint_as_float(optixGetAttribute_1()); + } + +# ifdef __TRANSPARENT_SHADOWS__ + // Detect if this surface has a shader with transparent shadows + if (!shader_transparent_shadow(NULL, isect) || optixGetPayload_2() >= optixGetPayload_3()) { +# endif + // This is an opaque hit or the hit limit has been reached, abort traversal + optixSetPayload_5(true); + return optixTerminateRay(); +# ifdef __TRANSPARENT_SHADOWS__ + } + + // TODO(pmours): Do we need REQUIRE_UNIQUE_ANYHIT for this to work? + optixSetPayload_2(optixGetPayload_2() + 1); // num_hits++ + + // Continue tracing + optixIgnoreIntersection(); +# endif +#endif +} + +extern "C" __global__ void __anyhit__kernel_optix_visibility_test() +{ + uint visibility = optixGetPayload_4(); +#ifdef __VISIBILITY_FLAG__ + const uint prim = optixGetPrimitiveIndex(); + if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) + return optixIgnoreIntersection(); +#endif + + // Shadow ray early termination + if (visibility & PATH_RAY_SHADOW_OPAQUE) + return optixTerminateRay(); +} + +extern "C" __global__ void __closesthit__kernel_optix_hit() +{ + optixSetPayload_0(__float_as_uint(optixGetRayTmax())); // Intersection distance + optixSetPayload_3(optixGetPrimitiveIndex()); + optixSetPayload_4(get_object_id()); + // Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index + optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex())); + + if (optixIsTriangleHit()) { + const float2 barycentrics = optixGetTriangleBarycentrics(); + optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x)); + optixSetPayload_2(__float_as_uint(barycentrics.x)); + } + else { + optixSetPayload_1(optixGetAttribute_0()); + optixSetPayload_2(optixGetAttribute_1()); + } +} + +#ifdef __HAIR__ +extern "C" __global__ void __intersection__curve() +{ + const uint prim = optixGetPrimitiveIndex(); + const uint object = get_object_id<true>(); + const uint type = kernel_tex_fetch(__prim_type, prim); + const uint visibility = optixGetPayload_4(); + + const float3 P = optixGetObjectRayOrigin(); + const float3 dir = optixGetObjectRayDirection(); + +# ifdef __OBJECT_MOTION__ + const float time = optixGetRayTime(); +# else + const float time = 0.0f; +# endif + + Intersection isect; + isect.t = optixGetRayTmax(); + + if (!(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) ? + curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type) : + cardinal_curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type)) { + optixReportIntersection(isect.t, + type & PRIMITIVE_ALL, + __float_as_int(isect.u), // Attribute_0 + __float_as_int(isect.v)); // Attribute_1 + } +} +#endif + +#ifdef __KERNEL_DEBUG__ +extern "C" __global__ void __exception__kernel_optix_exception() +{ + printf("Unhandled exception occured: code %d!\n", optixGetExceptionCode()); +} +#endif |