Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorPatrick Mours <pmours@nvidia.com>2019-09-12 15:46:47 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2019-09-13 12:46:22 +0300
commit53932f1f068501bfb095c407a7777a964dc5ec1c (patch)
tree82dfda8cd109d61ab0bf3601e5caf140001146c8 /intern
parent7eb293a37b868ffab53be056b85d6e1f5444f62f (diff)
Cycles: add Optix support in the kernel
This adds all the kernel side changes for the Optix backend. Ref D5363
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/kernel/bvh/bvh.h442
-rw-r--r--intern/cycles/kernel/geom/geom_curve_intersect.h4
-rw-r--r--intern/cycles/kernel/kernel_compat_optix.h89
-rw-r--r--intern/cycles/kernel/kernel_globals.h41
-rw-r--r--intern/cycles/kernel/kernel_path.h29
-rw-r--r--intern/cycles/kernel/kernel_shader.h14
-rw-r--r--intern/cycles/kernel/kernel_shadow.h54
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h20
-rw-r--r--intern/cycles/kernel/kernel_types.h37
-rw-r--r--intern/cycles/kernel/kernels/optix/kernel_optix.cu294
10 files changed, 826 insertions, 198 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 162b2fb5cdb..d0bc1fe4b36 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -33,136 +33,140 @@ CCL_NAMESPACE_BEGIN
#include "kernel/bvh/bvh_types.h"
+#ifndef __KERNEL_OPTIX__
+
/* Common QBVH functions. */
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_nodes.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_nodes.h"
+# ifdef __QBVH__
+# include "kernel/bvh/qbvh_nodes.h"
+# ifdef __KERNEL_AVX2__
+# include "kernel/bvh/obvh_nodes.h"
+# endif
# endif
-#endif
/* Regular BVH traversal */
-#include "kernel/bvh/bvh_nodes.h"
+# include "kernel/bvh/bvh_nodes.h"
-#define BVH_FUNCTION_NAME bvh_intersect
-#define BVH_FUNCTION_FEATURES 0
-#include "kernel/bvh/bvh_traversal.h"
-
-#if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
+# define BVH_FUNCTION_NAME bvh_intersect
+# define BVH_FUNCTION_FEATURES 0
# include "kernel/bvh/bvh_traversal.h"
-#endif
-#if defined(__HAIR__)
-# define BVH_FUNCTION_NAME bvh_intersect_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
-# include "kernel/bvh/bvh_traversal.h"
-#endif
+# if defined(__INSTANCING__)
+# define BVH_FUNCTION_NAME bvh_intersect_instancing
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING
+# include "kernel/bvh/bvh_traversal.h"
+# endif
-#if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
-# include "kernel/bvh/bvh_traversal.h"
-#endif
+# if defined(__HAIR__)
+# define BVH_FUNCTION_NAME bvh_intersect_hair
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
+# include "kernel/bvh/bvh_traversal.h"
+# endif
-#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
-# include "kernel/bvh/bvh_traversal.h"
-#endif
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_motion
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
+# include "kernel/bvh/bvh_traversal.h"
+# endif
-/* Subsurface scattering BVH traversal */
+# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
+# include "kernel/bvh/bvh_traversal.h"
+# endif
-#if defined(__BVH_LOCAL__)
-# define BVH_FUNCTION_NAME bvh_intersect_local
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/bvh_local.h"
+/* Subsurface scattering BVH traversal */
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_local_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# if defined(__BVH_LOCAL__)
+# define BVH_FUNCTION_NAME bvh_intersect_local
+# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_local.h"
-# endif
-#endif /* __BVH_LOCAL__ */
-
-/* Volume BVH traversal */
-#if defined(__VOLUME__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/bvh_volume.h"
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_local_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/bvh_local.h"
+# endif
+# endif /* __BVH_LOCAL__ */
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
-# include "kernel/bvh/bvh_volume.h"
-# endif
+/* Volume BVH traversal */
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
+# if defined(__VOLUME__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume
+# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_volume.h"
-# endif
-#endif /* __VOLUME__ */
-
-/* Record all intersections - Shadow BVH traversal */
-#if defined(__SHADOW_RECORD_ALL__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
-# define BVH_FUNCTION_FEATURES 0
-# include "kernel/bvh/bvh_shadow_all.h"
+# if defined(__INSTANCING__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
+# include "kernel/bvh/bvh_volume.h"
+# endif
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "kernel/bvh/bvh_shadow_all.h"
-# endif
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/bvh_volume.h"
+# endif
+# endif /* __VOLUME__ */
-# if defined(__HAIR__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
-# include "kernel/bvh/bvh_shadow_all.h"
-# endif
+/* Record all intersections - Shadow BVH traversal */
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
+# if defined(__SHADOW_RECORD_ALL__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
+# define BVH_FUNCTION_FEATURES 0
# include "kernel/bvh/bvh_shadow_all.h"
-# endif
-# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
-# include "kernel/bvh/bvh_shadow_all.h"
-# endif
-#endif /* __SHADOW_RECORD_ALL__ */
+# if defined(__INSTANCING__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING
+# include "kernel/bvh/bvh_shadow_all.h"
+# endif
+
+# if defined(__HAIR__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
+# include "kernel/bvh/bvh_shadow_all.h"
+# endif
+
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
+# include "kernel/bvh/bvh_shadow_all.h"
+# endif
+
+# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
+# include "kernel/bvh/bvh_shadow_all.h"
+# endif
+# endif /* __SHADOW_RECORD_ALL__ */
/* Record all intersections - Volume BVH traversal */
-#if defined(__VOLUME_RECORD_ALL__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/bvh_volume_all.h"
-
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
+# if defined(__VOLUME_RECORD_ALL__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_all
+# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_volume_all.h"
-# endif
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
-# include "kernel/bvh/bvh_volume_all.h"
-# endif
-#endif /* __VOLUME_RECORD_ALL__ */
+# if defined(__INSTANCING__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
+# include "kernel/bvh/bvh_volume_all.h"
+# endif
+
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/bvh_volume_all.h"
+# endif
+# endif /* __VOLUME_RECORD_ALL__ */
+
+# undef BVH_FEATURE
+# undef BVH_NAME_JOIN
+# undef BVH_NAME_EVAL
+# undef BVH_FUNCTION_FULL_NAME
-#undef BVH_FEATURE
-#undef BVH_NAME_JOIN
-#undef BVH_NAME_EVAL
-#undef BVH_FUNCTION_FULL_NAME
+#endif /* __KERNEL_OPTIX__ */
ccl_device_inline bool scene_intersect_valid(const Ray *ray)
{
@@ -173,8 +177,10 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray)
* such cases.
* From production scenes so far it seems it's enough to test first element
* only.
+ * Scene intersection may also called with empty rays for conditional trace
+ * calls that evaluate to false, so filter those out.
*/
- return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x);
+ return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
}
ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
@@ -184,10 +190,46 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT);
+#ifdef __KERNEL_OPTIX__
+ uint p0 = 0;
+ uint p1 = 0;
+ uint p2 = 0;
+ uint p3 = 0;
+ uint p4 = visibility;
+ uint p5 = PRIMITIVE_NONE;
+
+ optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
+ ray->P,
+ ray->D,
+ 0.0f,
+ ray->t,
+ ray->time,
+ 0xFF,
+ OPTIX_RAY_FLAG_NONE,
+ 0,
+ 0,
+ 0, // SBT offset for PG_HITD
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5);
+
+ isect->t = __uint_as_float(p0);
+ isect->u = __uint_as_float(p1);
+ isect->v = __uint_as_float(p2);
+ isect->prim = p3;
+ isect->object = p4;
+ isect->type = p5;
+
+ return p5 != PRIMITIVE_NONE;
+#else /* __KERNEL_OPTIX__ */
if (!scene_intersect_valid(ray)) {
return false;
}
-#ifdef __EMBREE__
+
+# ifdef __EMBREE__
if (kernel_data.bvh.scene) {
isect->t = ray->t;
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
@@ -202,42 +244,41 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
}
return false;
}
-#endif /* __EMBREE__ */
-#ifdef __OBJECT_MOTION__
+# endif /* __EMBREE__ */
+
+# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
-# ifdef __HAIR__
+# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
}
-# endif /* __HAIR__ */
+# endif /* __HAIR__ */
return bvh_intersect_motion(kg, ray, isect, visibility);
}
-#endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
-#ifdef __HAIR__
+# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_hair(kg, ray, isect, visibility);
}
-#endif /* __HAIR__ */
-
-#ifdef __KERNEL_CPU__
+# endif /* __HAIR__ */
-# ifdef __INSTANCING__
+# ifdef __KERNEL_CPU__
+# ifdef __INSTANCING__
if (kernel_data.bvh.have_instancing) {
return bvh_intersect_instancing(kg, ray, isect, visibility);
}
-# endif /* __INSTANCING__ */
+# endif /* __INSTANCING__ */
return bvh_intersect(kg, ray, isect, visibility);
-#else /* __KERNEL_CPU__ */
-
-# ifdef __INSTANCING__
+# else /* __KERNEL_CPU__ */
+# ifdef __INSTANCING__
return bvh_intersect_instancing(kg, ray, isect, visibility);
-# else
+# else
return bvh_intersect(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-
-#endif /* __KERNEL_CPU__ */
+# endif /* __INSTANCING__ */
+# endif /* __KERNEL_CPU__ */
+#endif /* __KERNEL_OPTIX__ */
}
#ifdef __BVH_LOCAL__
@@ -250,11 +291,43 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
+# ifdef __KERNEL_OPTIX__
+ uint p0 = ((uint64_t)lcg_state) & 0xFFFFFFFF;
+ uint p1 = (((uint64_t)lcg_state) >> 32) & 0xFFFFFFFF;
+ uint p2 = ((uint64_t)local_isect) & 0xFFFFFFFF;
+ uint p3 = (((uint64_t)local_isect) >> 32) & 0xFFFFFFFF;
+ uint p4 = local_object;
+ // Is set to zero on miss or if ray is aborted, so can be used as return value
+ uint p5 = max_hits;
+
+ local_isect->num_hits = 0; // Initialize hit count to zero
+ optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
+ ray->P,
+ ray->D,
+ 0.0f,
+ ray->t,
+ ray->time,
+ // Need to always call into __anyhit__kernel_optix_local_hit
+ 0xFF,
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 1,
+ 0,
+ 0, // SBT offset for PG_HITL
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5);
+
+ return p5;
+# else /* __KERNEL_OPTIX__ */
if (!scene_intersect_valid(ray)) {
local_isect->num_hits = 0;
return false;
}
-# ifdef __EMBREE__
+
+# ifdef __EMBREE__
if (kernel_data.bvh.scene) {
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS);
ctx.lcg_state = lcg_state;
@@ -296,13 +369,15 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
return local_isect->num_hits > 0;
}
-# endif /* __EMBREE__ */
-# ifdef __OBJECT_MOTION__
+# endif /* __EMBREE__ */
+
+# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
+# endif /* __KERNEL_OPTIX__ */
}
#endif
@@ -316,11 +391,41 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL);
+# ifdef __KERNEL_OPTIX__
+ uint p0 = ((uint64_t)isect) & 0xFFFFFFFF;
+ uint p1 = (((uint64_t)isect) >> 32) & 0xFFFFFFFF;
+ uint p3 = max_hits;
+ uint p4 = visibility;
+ uint p5 = false;
+
+ *num_hits = 0; // Initialize hit count to zero
+ optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
+ ray->P,
+ ray->D,
+ 0.0f,
+ ray->t,
+ ray->time,
+ // Need to always call into __anyhit__kernel_optix_shadow_all_hit
+ 0xFF,
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 2,
+ 0,
+ 0, // SBT offset for PG_HITS
+ p0,
+ p1,
+ *num_hits,
+ p3,
+ p4,
+ p5);
+
+ return p5;
+# else /* __KERNEL_OPTIX__ */
if (!scene_intersect_valid(ray)) {
*num_hits = 0;
return false;
}
-# ifdef __EMBREE__
+
+# ifdef __EMBREE__
if (kernel_data.bvh.scene) {
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
ctx.isect_s = isect;
@@ -337,32 +442,41 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
*num_hits = ctx.num_hits;
return rtc_ray.tfar == -INFINITY;
}
-# endif
-# ifdef __OBJECT_MOTION__
+# endif /* __EMBREE__ */
+
+# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
-# ifdef __HAIR__
+# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, visibility, max_hits, num_hits);
}
-# endif /* __HAIR__ */
+# endif /* __HAIR__ */
return bvh_intersect_shadow_all_motion(kg, ray, isect, visibility, max_hits, num_hits);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
-# ifdef __HAIR__
+# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(kg, ray, isect, visibility, max_hits, num_hits);
}
-# endif /* __HAIR__ */
+# endif /* __HAIR__ */
-# ifdef __INSTANCING__
+# ifdef __KERNEL_CPU__
+# ifdef __INSTANCING__
if (kernel_data.bvh.have_instancing) {
return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
}
-# endif /* __INSTANCING__ */
-
+# endif /* __INSTANCING__ */
return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
+# else
+# ifdef __INSTANCING__
+ return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
+# else
+ return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
+# endif /* __INSTANCING__ */
+# endif /* __KERNEL_CPU__ */
+# endif /* __KERNEL_OPTIX__ */
}
#endif /* __SHADOW_RECORD_ALL__ */
@@ -374,30 +488,67 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME);
+# ifdef __KERNEL_OPTIX__
+ uint p0 = 0;
+ uint p1 = 0;
+ uint p2 = 0;
+ uint p3 = 0;
+ uint p4 = visibility;
+ uint p5 = PRIMITIVE_NONE;
+
+ optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
+ ray->P,
+ ray->D,
+ 0.0f,
+ ray->t,
+ ray->time,
+ // Visibility mask set to only intersect objects with volumes
+ 0x02,
+ OPTIX_RAY_FLAG_NONE,
+ 0,
+ 0,
+ 0, // SBT offset for PG_HITD
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5);
+
+ isect->t = __uint_as_float(p0);
+ isect->u = __uint_as_float(p1);
+ isect->v = __uint_as_float(p2);
+ isect->prim = p3;
+ isect->object = p4;
+ isect->type = p5;
+
+ return p5 != PRIMITIVE_NONE;
+# else /* __KERNEL_OPTIX__ */
if (!scene_intersect_valid(ray)) {
return false;
}
-# ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
-# ifdef __KERNEL_CPU__
-# ifdef __INSTANCING__
+# ifdef __KERNEL_CPU__
+# ifdef __INSTANCING__
if (kernel_data.bvh.have_instancing) {
return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
}
-# endif /* __INSTANCING__ */
+# endif /* __INSTANCING__ */
return bvh_intersect_volume(kg, ray, isect, visibility);
-# else /* __KERNEL_CPU__ */
-# ifdef __INSTANCING__
+# else /* __KERNEL_CPU__ */
+# ifdef __INSTANCING__
return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
-# else
+# else
return bvh_intersect_volume(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-# endif /* __KERNEL_CPU__ */
+# endif /* __INSTANCING__ */
+# endif /* __KERNEL_CPU__ */
+# endif /* __KERNEL_OPTIX__ */
}
#endif /* __VOLUME__ */
@@ -413,6 +564,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
if (!scene_intersect_valid(ray)) {
return false;
}
+
# ifdef __EMBREE__
if (kernel_data.bvh.scene) {
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index 0327ebf8890..7a770470150 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -38,12 +38,14 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg,
{
const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
+# ifndef __KERNEL_OPTIX__ /* see OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */
if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
if (time < prim_time.x || time > prim_time.y) {
return false;
}
}
+# endif
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
float epsilon = 0.0f;
@@ -505,12 +507,14 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
+# ifndef __KERNEL_OPTIX__ /* see OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */
if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
if (time < prim_time.x || time > prim_time.y) {
return false;
}
}
+# endif
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
/* curve Intersection check */
diff --git a/intern/cycles/kernel/kernel_compat_optix.h b/intern/cycles/kernel/kernel_compat_optix.h
new file mode 100644
index 00000000000..61b9d87a020
--- /dev/null
+++ b/intern/cycles/kernel/kernel_compat_optix.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2019, NVIDIA Corporation.
+ * Copyright 2019, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_COMPAT_OPTIX_H__
+#define __KERNEL_COMPAT_OPTIX_H__
+
+#define OPTIX_DONT_INCLUDE_CUDA
+#include <optix.h>
+
+#define __KERNEL_GPU__
+#define __KERNEL_CUDA__ // OptiX kernels are implicitly CUDA kernels too
+#define __KERNEL_OPTIX__
+#define CCL_NAMESPACE_BEGIN
+#define CCL_NAMESPACE_END
+
+#ifndef ATTR_FALLTHROUGH
+# define ATTR_FALLTHROUGH
+#endif
+
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+typedef unsigned short half;
+typedef unsigned long long CUtexObject;
+
+#define FLT_MIN 1.175494350822287507969e-38f
+#define FLT_MAX 340282346638528859811704183484516925440.0f
+
+__device__ half __float2half(const float f)
+{
+ half val;
+ asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
+ return val;
+}
+
+/* Selective nodes compilation. */
+#ifndef __NODES_MAX_GROUP__
+# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
+#endif
+#ifndef __NODES_FEATURES__
+# define __NODES_FEATURES__ NODE_FEATURE_ALL
+#endif
+
+#define ccl_device \
+ __device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
+#define ccl_device_inline ccl_device
+#define ccl_device_forceinline ccl_device
+#define ccl_device_noinline __device__ __noinline__
+#define ccl_device_noinline_cpu ccl_device
+#define ccl_global
+#define ccl_static_constant __constant__
+#define ccl_constant const
+#define ccl_local
+#define ccl_local_param
+#define ccl_private
+#define ccl_may_alias
+#define ccl_addr_space
+#define ccl_restrict __restrict__
+#define ccl_ref
+#define ccl_align(n) __align__(n)
+
+// Zero initialize structs to help the compiler figure out scoping
+#define ccl_optional_struct_init = {}
+
+#define kernel_data __params.data // See kernel_globals.h
+#define kernel_tex_array(t) __params.t
+#define kernel_tex_fetch(t, index) __params.t[(index)]
+
+#define kernel_assert(cond)
+
+/* Types */
+
+#include "util/util_half.h"
+#include "util/util_types.h"
+
+#endif /* __KERNEL_COMPAT_OPTIX_H__ */
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index 9dbf3b7ea2e..a440021b6b9 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -90,12 +90,43 @@ typedef struct KernelGlobals {
#endif /* __KERNEL_CPU__ */
+#ifdef __KERNEL_OPTIX__
+
+typedef struct ShaderParams {
+ uint4 *input;
+ float4 *output;
+ int type;
+ int filter;
+ int sx;
+ int offset;
+ int sample;
+} ShaderParams;
+
+typedef struct KernelParams {
+ WorkTile tile;
+ KernelData data;
+ ShaderParams shader;
+# define KERNEL_TEX(type, name) const type *name;
+# include "kernel/kernel_textures.h"
+} KernelParams;
+
+typedef struct KernelGlobals {
+# ifdef __VOLUME__
+ VolumeState volume_state;
+# endif
+ Intersection hits_stack[64];
+} KernelGlobals;
+
+extern "C" __constant__ KernelParams __params;
+
+#else /* __KERNEL_OPTIX__ */
+
/* For CUDA, constant memory textures must be globals, so we can't put them
* into a struct. As a result we don't actually use this struct and use actual
* globals and simply pass along a NULL pointer everywhere, which we hope gets
* optimized out. */
-#ifdef __KERNEL_CUDA__
+# ifdef __KERNEL_CUDA__
__constant__ KernelData __data;
typedef struct KernelGlobals {
@@ -103,10 +134,12 @@ typedef struct KernelGlobals {
Intersection hits_stack[64];
} KernelGlobals;
-# define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
-# include "kernel/kernel_textures.h"
+# define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
+# include "kernel/kernel_textures.h"
+
+# endif /* __KERNEL_CUDA__ */
-#endif /* __KERNEL_CUDA__ */
+#endif /* __KERNEL_OPTIX__ */
/* OpenCL */
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 0dc55eba14a..d45ffe9c7df 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -326,13 +326,19 @@ ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg,
return true;
}
-ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *emission_sd,
- PathRadiance *L,
- ccl_addr_space PathState *state,
- float3 throughput,
- float3 ao_alpha)
+#ifdef __KERNEL_OPTIX__
+ccl_device_inline /* inline trace calls */
+#else
+ccl_device_noinline
+#endif
+ void
+ kernel_path_ao(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ ccl_addr_space PathState *state,
+ float3 throughput,
+ float3 ao_alpha)
{
PROFILING_INIT(kg, PROFILING_AO);
@@ -655,9 +661,11 @@ ccl_device void kernel_path_trace(
kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
+# ifndef __KERNEL_OPTIX__
if (ray.t == 0.0f) {
return;
}
+# endif
/* Initialize state. */
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
@@ -671,6 +679,13 @@ ccl_device void kernel_path_trace(
PathState state;
path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
+# ifdef __KERNEL_OPTIX__
+ /* Force struct into local memory to avoid costly spilling on trace calls. */
+ if (pass_stride < 0) /* This is never executed and just prevents the compiler from doing SROA. */
+ for (int i = 0; i < sizeof(L); ++i)
+ reinterpret_cast<unsigned char *>(&L)[-pass_stride + i] = 0;
+# endif
+
/* Integrate. */
kernel_path_integrate(kg, &state, throughput, &ray, &L, buffer, emission_sd);
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index b8202326cdf..7ccb99cad2a 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -48,10 +48,16 @@ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd
}
#endif
-ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
- ShaderData *sd,
- const Intersection *isect,
- const Ray *ray)
+#ifdef __KERNEL_OPTIX__
+ccl_device_inline
+#else
+ccl_device_noinline
+#endif
+ void
+ shader_setup_from_ray(KernelGlobals *kg,
+ ShaderData *sd,
+ const Intersection *isect,
+ const Ray *ray)
{
PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index c02d7d77faf..61fcc61264a 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -17,13 +17,6 @@
CCL_NAMESPACE_BEGIN
#ifdef __VOLUME__
-typedef struct VolumeState {
-# ifdef __SPLIT_KERNEL__
-# else
- PathState ps;
-# endif
-} VolumeState;
-
/* Get PathState ready for use for volume stack evaluation. */
# ifdef __SPLIT_KERNEL__
ccl_addr_space
@@ -55,16 +48,15 @@ ccl_addr_space
/* Attenuate throughput accordingly to the given intersection event.
* Returns true if the throughput is zero and traversal can be aborted.
*/
-ccl_device_forceinline bool shadow_handle_transparent_isect(
- KernelGlobals *kg,
- ShaderData *shadow_sd,
- ccl_addr_space PathState *state,
+ccl_device_forceinline bool shadow_handle_transparent_isect(KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
#ifdef __VOLUME__
- ccl_addr_space struct PathState *volume_state,
+ ccl_addr_space PathState *volume_state,
#endif
- Intersection *isect,
- Ray *ray,
- float3 *throughput)
+ Intersection *isect,
+ Ray *ray,
+ float3 *throughput)
{
#ifdef __VOLUME__
/* Attenuation between last surface and next surface. */
@@ -163,7 +155,11 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
uint num_hits;
const bool blocked = scene_intersect_shadow_all(kg, ray, hits, visibility, max_hits, &num_hits);
# ifdef __VOLUME__
+# ifdef __KERNEL_OPTIX__
+ VolumeState &volume_state = kg->volume_state;
+# else
VolumeState volume_state;
+# endif
# endif
/* If no opaque surface found but we did find transparent hits,
* shade them.
@@ -302,7 +298,11 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg,
float3 *shadow)
{
# ifdef __VOLUME__
+# ifdef __KERNEL_OPTIX__
+ VolumeState &volume_state = kg->volume_state;
+# else
VolumeState volume_state;
+# endif
# endif
if (blocked && is_transparent_isect) {
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
@@ -387,32 +387,38 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
ShaderData *sd,
ShaderData *shadow_sd,
ccl_addr_space PathState *state,
- Ray *ray_input,
+ Ray *ray,
float3 *shadow)
{
- Ray *ray = ray_input;
- Intersection isect;
- /* Some common early checks. */
*shadow = make_float3(1.0f, 1.0f, 1.0f);
+#if !defined(__KERNEL_OPTIX__)
+ /* Some common early checks.
+ * Avoid conditional trace call in OptiX though, since those hurt performance there.
+ */
if (ray->t == 0.0f) {
return false;
}
+#endif
#ifdef __SHADOW_TRICKS__
const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) ? PATH_RAY_SHADOW_NON_CATCHER :
PATH_RAY_SHADOW;
#else
const uint visibility = PATH_RAY_SHADOW;
#endif
- /* Do actual shadow shading. */
- /* First of all, we check if integrator requires transparent shadows.
+ /* Do actual shadow shading.
+ * First of all, we check if integrator requires transparent shadows.
* if not, we use simplest and fastest ever way to calculate occlusion.
+ * Do not do this in OptiX to avoid the additional trace call.
*/
-#ifdef __TRANSPARENT_SHADOWS__
+#if !defined(__KERNEL_OPTIX__) || !defined(__TRANSPARENT_SHADOWS__)
+ Intersection isect;
+# ifdef __TRANSPARENT_SHADOWS__
if (!kernel_data.integrator.transparent_shadows)
-#endif
+# endif
{
return shadow_blocked_opaque(kg, shadow_sd, state, visibility, ray, &isect, shadow);
}
+#endif
#ifdef __TRANSPARENT_SHADOWS__
# ifdef __SHADOW_RECORD_ALL__
/* For the transparent shadows we try to use record-all logic on the
@@ -426,7 +432,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
return true;
}
const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
-# ifdef __KERNEL_GPU__
+# if defined(__KERNEL_GPU__) && !defined(__KERNEL_OPTIX__)
/* On GPU we do tricky with tracing opaque ray first, this avoids speed
* regressions in some files.
*
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 8dc1904058d..dbe2c12ce81 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -353,13 +353,19 @@ ccl_device void subsurface_random_walk_coefficients(const ShaderClosure *sc,
*weight = safe_divide_color(bssrdf->weight, A);
}
-ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg,
- LocalIntersection *ss_isect,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- const ShaderClosure *sc,
- const float bssrdf_u,
- const float bssrdf_v)
+#ifdef __KERNEL_OPTIX__
+ccl_device_inline /* inline trace calls */
+#else
+ccl_device_noinline
+#endif
+ bool
+ subsurface_random_walk(KernelGlobals *kg,
+ LocalIntersection *ss_isect,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ const ShaderClosure *sc,
+ const float bssrdf_u,
+ const float bssrdf_v)
{
/* Sample diffuse surface scatter into the object. */
float3 D;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index f0054691b54..7aef34b00a2 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -143,6 +143,13 @@ CCL_NAMESPACE_BEGIN
# endif
#endif /* __KERNEL_CUDA__ */
+#ifdef __KERNEL_OPTIX__
+# undef __BAKING__
+# undef __BRANCHED_PATH__
+/* TODO(pmours): Cannot use optixTrace in non-inlined functions */
+# undef __SHADER_RAYTRACE__
+#endif /* __KERNEL_OPTIX__ */
+
#ifdef __KERNEL_OPENCL__
#endif /* __KERNEL_OPENCL__ */
@@ -1056,6 +1063,15 @@ typedef struct PathState {
#endif
} PathState;
+#ifdef __VOLUME__
+typedef struct VolumeState {
+# ifdef __SPLIT_KERNEL__
+# else
+ PathState ps;
+# endif
+} VolumeState;
+#endif
+
/* Struct to gather multiple nearby intersections. */
typedef struct LocalIntersection {
Ray ray;
@@ -1343,9 +1359,12 @@ typedef enum KernelBVHLayout {
BVH_LAYOUT_BVH2 = (1 << 0),
BVH_LAYOUT_BVH4 = (1 << 1),
BVH_LAYOUT_BVH8 = (1 << 2),
+
BVH_LAYOUT_EMBREE = (1 << 3),
+ BVH_LAYOUT_OPTIX = (1 << 4),
+
BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8,
- BVH_LAYOUT_ALL = (unsigned int)(-1),
+ BVH_LAYOUT_ALL = (unsigned int)(~0u),
} KernelBVHLayout;
typedef struct KernelBVH {
@@ -1357,14 +1376,18 @@ typedef struct KernelBVH {
int bvh_layout;
int use_bvh_steps;
- /* Embree */
-#ifdef __EMBREE__
+ /* Custom BVH */
+#ifdef __KERNEL_OPTIX__
+ OptixTraversableHandle scene;
+#else
+# ifdef __EMBREE__
RTCScene scene;
-# ifndef __KERNEL_64_BIT__
- int pad1;
+# ifndef __KERNEL_64_BIT__
+ int pad2;
+# endif
+# else
+ int scene, pad2;
# endif
-#else
- int pad1, pad2;
#endif
} KernelBVH;
static_assert_align(KernelBVH, 16);
diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
new file mode 100644
index 00000000000..c7223a49d79
--- /dev/null
+++ b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
@@ -0,0 +1,294 @@
+/*
+ * Copyright 2019, NVIDIA Corporation.
+ * Copyright 2019, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_optix.h"
+#include "util/util_atomic.h"
+#include "kernel/kernel_types.h"
+#include "kernel/kernel_globals.h"
+#include "../cuda/kernel_cuda_image.h" // Texture lookup uses normal CUDA intrinsics
+
+#include "kernel/kernel_path.h"
+#include "kernel/kernel_bake.h"
+
+template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
+{
+ return (T *)(((uint64_t)optixGetPayload_1() << 32) | optixGetPayload_0());
+}
+template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
+{
+ return (T *)(((uint64_t)optixGetPayload_3() << 32) | optixGetPayload_2());
+}
+
+template<bool always = false> ccl_device_forceinline uint get_object_id()
+{
+#ifdef __OBJECT_MOTION__
+ // Always get the the instance ID from the TLAS
+ // There might be a motion transform node between TLAS and BLAS which does not have one
+ uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
+#else
+ uint object = optixGetInstanceId();
+#endif
+ // Choose between always returning object ID or only for instances
+ if (always)
+ // Can just remove the high bit since instace always contains object ID
+ return object & 0x7FFFFF;
+ // Set to OBJECT_NONE if this is not an instanced object
+ else if (object & 0x800000)
+ object = OBJECT_NONE;
+ return object;
+}
+
+extern "C" __global__ void __raygen__kernel_optix_path_trace()
+{
+ KernelGlobals kg; // Allocate stack storage for common data
+
+ const uint3 launch_index = optixGetLaunchIndex();
+ // Keep threads for same pixel together to improve occupancy of warps
+ uint pixel_offset = launch_index.x / __params.tile.num_samples;
+ uint sample_offset = launch_index.x % __params.tile.num_samples;
+
+ kernel_path_trace(&kg,
+ __params.tile.buffer,
+ __params.tile.start_sample + sample_offset,
+ __params.tile.x + pixel_offset,
+ __params.tile.y + launch_index.y,
+ __params.tile.offset,
+ __params.tile.stride);
+}
+
+#ifdef __BAKING__
+extern "C" __global__ void __raygen__kernel_optix_bake()
+{
+ KernelGlobals kg;
+ const ShaderParams &p = __params.shader;
+ kernel_bake_evaluate(&kg,
+ p.input,
+ p.output,
+ (ShaderEvalType)p.type,
+ p.filter,
+ p.sx + optixGetLaunchIndex().x,
+ p.offset,
+ p.sample);
+}
+#endif
+
+extern "C" __global__ void __raygen__kernel_optix_displace()
+{
+ KernelGlobals kg;
+ const ShaderParams &p = __params.shader;
+ kernel_displace_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_background()
+{
+ KernelGlobals kg;
+ const ShaderParams &p = __params.shader;
+ kernel_background_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x);
+}
+
+extern "C" __global__ void __miss__kernel_optix_miss()
+{
+ // 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss
+ optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
+ optixSetPayload_5(PRIMITIVE_NONE);
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_local_hit()
+{
+#ifdef __BVH_LOCAL__
+ const uint object = get_object_id<true>();
+ if (object != optixGetPayload_4() /* local_object */) {
+ // Only intersect with matching object
+ return optixIgnoreIntersection();
+ }
+
+ int hit = 0;
+ uint *const lcg_state = get_payload_ptr_0<uint>();
+ LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
+
+ if (lcg_state) {
+ const uint max_hits = optixGetPayload_5();
+ for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+ if (optixGetRayTmax() == local_isect->hits[i].t) {
+ return optixIgnoreIntersection();
+ }
+ }
+
+ hit = local_isect->num_hits++;
+
+ if (local_isect->num_hits > max_hits) {
+ hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
+ if (hit >= max_hits) {
+ return optixIgnoreIntersection();
+ }
+ }
+ }
+ else {
+ if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
+ // Record closest intersection only (do not terminate ray here, since there is no guarantee
+ // about distance ordering in anyhit)
+ return optixIgnoreIntersection();
+ }
+
+ local_isect->num_hits = 1;
+ }
+
+ Intersection *isect = &local_isect->hits[hit];
+ isect->t = optixGetRayTmax();
+ isect->prim = optixGetPrimitiveIndex();
+ isect->object = get_object_id();
+ isect->type = kernel_tex_fetch(__prim_type, isect->prim);
+
+ if (optixIsTriangleHit()) {
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ isect->u = 1.0f - barycentrics.y - barycentrics.x;
+ isect->v = barycentrics.x;
+ }
+ else {
+ isect->u = __uint_as_float(optixGetAttribute_0());
+ isect->v = __uint_as_float(optixGetAttribute_1());
+ }
+
+ // Record geometric normal
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
+ const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0));
+ const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1));
+ const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
+ local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
+
+ // Continue tracing (without this the trace call would return after the first hit)
+ optixIgnoreIntersection();
+#endif
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
+{
+#ifdef __SHADOW_RECORD_ALL__
+ const uint prim = optixGetPrimitiveIndex();
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+# endif
+
+ // Offset into array with num_hits
+ Intersection *const isect = get_payload_ptr_0<Intersection>() + optixGetPayload_2();
+ isect->t = optixGetRayTmax();
+ isect->prim = prim;
+ isect->object = get_object_id();
+ isect->type = kernel_tex_fetch(__prim_type, prim);
+
+ if (optixIsTriangleHit()) {
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ isect->u = 1.0f - barycentrics.y - barycentrics.x;
+ isect->v = barycentrics.x;
+ }
+ else {
+ isect->u = __uint_as_float(optixGetAttribute_0());
+ isect->v = __uint_as_float(optixGetAttribute_1());
+ }
+
+# ifdef __TRANSPARENT_SHADOWS__
+ // Detect if this surface has a shader with transparent shadows
+ if (!shader_transparent_shadow(NULL, isect) || optixGetPayload_2() >= optixGetPayload_3()) {
+# endif
+ // This is an opaque hit or the hit limit has been reached, abort traversal
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+# ifdef __TRANSPARENT_SHADOWS__
+ }
+
+ // TODO(pmours): Do we need REQUIRE_UNIQUE_ANYHIT for this to work?
+ optixSetPayload_2(optixGetPayload_2() + 1); // num_hits++
+
+ // Continue tracing
+ optixIgnoreIntersection();
+# endif
+#endif
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
+{
+ uint visibility = optixGetPayload_4();
+#ifdef __VISIBILITY_FLAG__
+ const uint prim = optixGetPrimitiveIndex();
+ if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0)
+ return optixIgnoreIntersection();
+#endif
+
+ // Shadow ray early termination
+ if (visibility & PATH_RAY_SHADOW_OPAQUE)
+ return optixTerminateRay();
+}
+
+extern "C" __global__ void __closesthit__kernel_optix_hit()
+{
+ optixSetPayload_0(__float_as_uint(optixGetRayTmax())); // Intersection distance
+ optixSetPayload_3(optixGetPrimitiveIndex());
+ optixSetPayload_4(get_object_id());
+ // Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index
+ optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex()));
+
+ if (optixIsTriangleHit()) {
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
+ optixSetPayload_2(__float_as_uint(barycentrics.x));
+ }
+ else {
+ optixSetPayload_1(optixGetAttribute_0());
+ optixSetPayload_2(optixGetAttribute_1());
+ }
+}
+
+#ifdef __HAIR__
+extern "C" __global__ void __intersection__curve()
+{
+ const uint prim = optixGetPrimitiveIndex();
+ const uint object = get_object_id<true>();
+ const uint type = kernel_tex_fetch(__prim_type, prim);
+ const uint visibility = optixGetPayload_4();
+
+ const float3 P = optixGetObjectRayOrigin();
+ const float3 dir = optixGetObjectRayDirection();
+
+# ifdef __OBJECT_MOTION__
+ const float time = optixGetRayTime();
+# else
+ const float time = 0.0f;
+# endif
+
+ Intersection isect;
+ isect.t = optixGetRayTmax();
+
+ if (!(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) ?
+ curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type) :
+ cardinal_curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type)) {
+ optixReportIntersection(isect.t,
+ type & PRIMITIVE_ALL,
+ __float_as_int(isect.u), // Attribute_0
+ __float_as_int(isect.v)); // Attribute_1
+ }
+}
+#endif
+
+#ifdef __KERNEL_DEBUG__
+extern "C" __global__ void __exception__kernel_optix_exception()
+{
+ printf("Unhandled exception occured: code %d!\n", optixGetExceptionCode());
+}
+#endif