Cycles: Add support for native OptiX curve primitive

This patch adds support for the curve primitive from OptiX to Cycles. It's currently hidden behind a debug option, since there can be some slight rendering differences still (because no backface culling is performed and something seems off with endcaps). The curve primitive was added with the OptiX 7.1 SDK and requires a r450 driver or newer, so this also updates the codebase to be able to build with the new SDK. Reviewed By: brecht Differential Revision: https://developer.blender.org/D8223
author: Patrick Mours <pmours@nvidia.com> 2020-07-06 13:25:54 +0300
committer: Patrick Mours <pmours@nvidia.com> 2020-07-07 16:39:02 +0300
commit: 737bd549b6eeee81f0573ad8e305fb8d888d82ec (patch)
tree: fc8aaca871557d652287cb3d4f9ee3033ff06e46 /intern/cycles/device
parent: 95f0f312799e10e4a9f5c884f8f20ec76d0ff363 (diff)
1 files changed, 247 insertions, 53 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index ececca3df53..266222c74c5 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -131,8 +131,12 @@ class OptiXDevice : public CUDADevice {
     PG_RGEN,
     PG_MISS,
     PG_HITD,  // Default hit group
-    PG_HITL,  // __BVH_LOCAL__ hit group
     PG_HITS,  // __SHADOW_RECORD_ALL__ hit group
+    PG_HITL,  // __BVH_LOCAL__ hit group (only used for triangles)
+#  if OPTIX_ABI_VERSION >= 36
+    PG_HITD_MOTION,
+    PG_HITS_MOTION,
+#  endif
 #  ifdef WITH_CYCLES_DEBUG
     PG_EXCP,
 #  endif
@@ -177,6 +181,7 @@ class OptiXDevice : public CUDADevice {
   OptixDeviceContext context = NULL;
 
   OptixModule optix_module = NULL;  // All necessary OptiX kernels are in one module
+  OptixModule builtin_modules[2] = {};
   OptixPipeline pipelines[NUM_PIPELINES] = {};
 
   bool motion_blur = false;
@@ -264,6 +269,9 @@ class OptiXDevice : public CUDADevice {
     // Unload modules
     if (optix_module != NULL)
       optixModuleDestroy(optix_module);
+    for (unsigned int i = 0; i < 2; ++i)
+      if (builtin_modules[i] != NULL)
+        optixModuleDestroy(builtin_modules[i]);
     for (unsigned int i = 0; i < NUM_PIPELINES; ++i)
       if (pipelines[i] != NULL)
         optixPipelineDestroy(pipelines[i]);
@@ -338,6 +346,12 @@ class OptiXDevice : public CUDADevice {
       optixModuleDestroy(optix_module);
       optix_module = NULL;
     }
+    for (unsigned int i = 0; i < 2; ++i) {
+      if (builtin_modules[i] != NULL) {
+        optixModuleDestroy(builtin_modules[i]);
+        builtin_modules[i] = NULL;
+      }
+    }
     for (unsigned int i = 0; i < NUM_PIPELINES; ++i) {
       if (pipelines[i] != NULL) {
         optixPipelineDestroy(pipelines[i]);
@@ -369,6 +383,18 @@ class OptiXDevice : public CUDADevice {
 #  endif
     pipeline_options.pipelineLaunchParamsVariableName = "__params";  // See kernel_globals.h
 
+#  if OPTIX_ABI_VERSION >= 36
+    pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
+    if (requested_features.use_hair) {
+      if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) {
+        pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
+      }
+      else {
+        pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
+      }
+    }
+#  endif
+
     // Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
     // This is necessary since objects may be reported to have motion if the Vector pass is
     // active, but may still need to be rendered without motion blur if that isn't active as well
@@ -442,6 +468,34 @@ class OptiXDevice : public CUDADevice {
         group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
         group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
       }
+
+#  if OPTIX_ABI_VERSION >= 36
+      if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) {
+        OptixBuiltinISOptions builtin_options;
+        builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
+        builtin_options.usesMotionBlur = false;
+
+        check_result_optix_ret(optixBuiltinISModuleGet(
+            context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0]));
+
+        group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0];
+        group_descs[PG_HITD].hitgroup.entryFunctionNameIS = nullptr;
+        group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0];
+        group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr;
+
+        if (motion_blur) {
+          builtin_options.usesMotionBlur = true;
+
+          check_result_optix_ret(optixBuiltinISModuleGet(
+              context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1]));
+
+          group_descs[PG_HITD_MOTION] = group_descs[PG_HITD];
+          group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1];
+          group_descs[PG_HITS_MOTION] = group_descs[PG_HITS];
+          group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1];
+        }
+      }
+#  endif
     }
 
     if (requested_features.use_subsurface || requested_features.use_shader_raytrace) {
@@ -493,8 +547,14 @@ class OptiXDevice : public CUDADevice {
     unsigned int trace_css = stack_size[PG_HITD].cssCH;
     // This is based on the maximum of closest-hit and any-hit/intersection programs
     trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
-    trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
     trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
+    trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
+#  if OPTIX_ABI_VERSION >= 36
+    trace_css = std::max(trace_css,
+                         stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
+    trace_css = std::max(trace_css,
+                         stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
+#  endif
 
     OptixPipelineLinkOptions link_options;
     link_options.maxTraceDepth = 1;
@@ -503,17 +563,23 @@ class OptiXDevice : public CUDADevice {
 #  else
     link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
 #  endif
-    link_options.overrideUsesMotionBlur = pipeline_options.usesMotionBlur;
+#  if OPTIX_ABI_VERSION < 24
+    link_options.overrideUsesMotionBlur = motion_blur;
+#  endif
 
     {  // Create path tracing pipeline
       OptixProgramGroup pipeline_groups[] = {
-          groups[PG_RGEN],
-          groups[PG_MISS],
-          groups[PG_HITD],
-          groups[PG_HITS],
-          groups[PG_HITL],
+        groups[PG_RGEN],
+        groups[PG_MISS],
+        groups[PG_HITD],
+        groups[PG_HITS],
+        groups[PG_HITL],
+#  if OPTIX_ABI_VERSION >= 36
+        groups[PG_HITD_MOTION],
+        groups[PG_HITS_MOTION],
+#  endif
 #  ifdef WITH_CYCLES_DEBUG
-          groups[PG_EXCP],
+        groups[PG_EXCP],
 #  endif
       };
       check_result_optix_ret(
@@ -530,8 +596,8 @@ class OptiXDevice : public CUDADevice {
       const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css;
 
       // Set stack size depending on pipeline options
-      check_result_optix_ret(optixPipelineSetStackSize(
-          pipelines[PIP_PATH_TRACE], 0, 0, css, (pipeline_options.usesMotionBlur ? 3 : 2)));
+      check_result_optix_ret(
+          optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], 0, 0, css, (motion_blur ? 3 : 2)));
     }
 
     // Only need to create shader evaluation pipeline if one of these features is used:
@@ -541,15 +607,19 @@ class OptiXDevice : public CUDADevice {
 
     if (use_shader_eval_pipeline) {  // Create shader evaluation pipeline
       OptixProgramGroup pipeline_groups[] = {
-          groups[PG_BAKE],
-          groups[PG_DISP],
-          groups[PG_BACK],
-          groups[PG_MISS],
-          groups[PG_HITD],
-          groups[PG_HITS],
-          groups[PG_HITL],
+        groups[PG_BAKE],
+        groups[PG_DISP],
+        groups[PG_BACK],
+        groups[PG_MISS],
+        groups[PG_HITD],
+        groups[PG_HITS],
+        groups[PG_HITL],
+#  if OPTIX_ABI_VERSION >= 36
+        groups[PG_HITD_MOTION],
+        groups[PG_HITS_MOTION],
+#  endif
 #  ifdef WITH_CYCLES_DEBUG
-          groups[PG_EXCP],
+        groups[PG_EXCP],
 #  endif
       };
       check_result_optix_ret(
@@ -672,7 +742,11 @@ class OptiXDevice : public CUDADevice {
       sbt_params.missRecordCount = 1;
       sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord);
       sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
-      sbt_params.hitgroupRecordCount = 3;  // PG_HITD, PG_HITL, PG_HITS
+#  if OPTIX_ABI_VERSION >= 36
+      sbt_params.hitgroupRecordCount = 5;  // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL
+#  else
+      sbt_params.hitgroupRecordCount = 3;  // PG_HITD, PG_HITS, PG_HITL
+#  endif
 
       // Launch the ray generation program
       check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE],
@@ -836,7 +910,9 @@ class OptiXDevice : public CUDADevice {
         assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3);
         denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>(
             OPTIX_DENOISER_INPUT_RGB + (task.denoising.optix_input_passes - 1));
+#  if OPTIX_ABI_VERSION < 28
         denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3;
+#  endif
         check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser));
         check_result_optix_ret(
             optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0));
@@ -849,7 +925,11 @@ class OptiXDevice : public CUDADevice {
       check_result_optix_ret(
           optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes));
 
+#  if OPTIX_ABI_VERSION < 28
       const size_t scratch_size = sizes.recommendedScratchSizeInBytes;
+#  else
+      const size_t scratch_size = sizes.withOverlapScratchSizeInBytes;
+#  endif
       const size_t scratch_offset = sizes.stateSizeInBytes;
 
       // Allocate denoiser state if tile size has changed since last setup
@@ -993,7 +1073,11 @@ class OptiXDevice : public CUDADevice {
       sbt_params.missRecordCount = 1;
       sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord);
       sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
-      sbt_params.hitgroupRecordCount = 3;  // PG_HITD, PG_HITL, PG_HITS
+#  if OPTIX_ABI_VERSION >= 36
+      sbt_params.hitgroupRecordCount = 5;  // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL
+#  else
+      sbt_params.hitgroupRecordCount = 3;  // PG_HITD, PG_HITS, PG_HITL
+#  endif
 
       check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL],
                                      cuda_stream[thread_index],
@@ -1070,7 +1154,7 @@ class OptiXDevice : public CUDADevice {
                                            &build_input,
                                            1,
                                            temp_mem.device_pointer,
-                                           temp_mem.device_size,
+                                           sizes.tempSizeInBytes,
                                            out_data,
                                            sizes.outputSizeInBytes,
                                            &out_handle,
@@ -1142,7 +1226,6 @@ class OptiXDevice : public CUDADevice {
           continue;
         }
 
-        const size_t num_curves = hair->num_curves();
         const size_t num_segments = hair->num_segments();
 
         size_t num_motion_steps = 1;
@@ -1152,7 +1235,18 @@ class OptiXDevice : public CUDADevice {
         }
 
         device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY);
-        aabb_data.alloc(num_segments * num_motion_steps);
+#  if OPTIX_ABI_VERSION >= 36
+        device_vector<int> index_data(this, "temp_index_data", MEM_READ_ONLY);
+        device_vector<float4> vertex_data(this, "temp_vertex_data", MEM_READ_ONLY);
+        // Four control points for each curve segment
+        const size_t num_vertices = num_segments * 4;
+        if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
+          index_data.alloc(num_segments);
+          vertex_data.alloc(num_vertices * num_motion_steps);
+        }
+        else
+#  endif
+          aabb_data.alloc(num_segments * num_motion_steps);
 
         // Get AABBs for each motion step
         for (size_t step = 0; step < num_motion_steps; ++step) {
@@ -1165,44 +1259,127 @@ class OptiXDevice : public CUDADevice {
             keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size();
           }
 
-          size_t i = step * num_segments;
-          for (size_t j = 0; j < num_curves; ++j) {
-            const Hair::Curve c = hair->get_curve(j);
-
-            for (size_t k = 0; k < c.num_segments(); ++i, ++k) {
-              BoundBox bounds = BoundBox::empty;
-              c.bounds_grow(k, keys, hair->curve_radius.data(), bounds);
-
-              aabb_data[i].minX = bounds.min.x;
-              aabb_data[i].minY = bounds.min.y;
-              aabb_data[i].minZ = bounds.min.z;
-              aabb_data[i].maxX = bounds.max.x;
-              aabb_data[i].maxY = bounds.max.y;
-              aabb_data[i].maxZ = bounds.max.z;
+          for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
+            const Hair::Curve curve = hair->get_curve(j);
+
+            for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
+#  if OPTIX_ABI_VERSION >= 36
+              if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
+                int k0 = curve.first_key + segment;
+                int k1 = k0 + 1;
+                int ka = max(k0 - 1, curve.first_key);
+                int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
+
+                const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
+                const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
+                const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
+                const float4 pw = make_float4(hair->curve_radius[ka],
+                                              hair->curve_radius[k0],
+                                              hair->curve_radius[k1],
+                                              hair->curve_radius[kb]);
+
+                // Convert Catmull-Rom data to Bezier spline
+                static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
+                static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
+                static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
+                static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
+
+                index_data[i] = i * 4;
+                float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
+                v[0] = make_float4(
+                    dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
+                v[1] = make_float4(
+                    dot(cr2bsp1, px), dot(cr2bsp1, py), dot(cr2bsp1, pz), dot(cr2bsp1, pw));
+                v[2] = make_float4(
+                    dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
+                v[3] = make_float4(
+                    dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
+              }
+              else
+#  endif
+              {
+                BoundBox bounds = BoundBox::empty;
+                curve.bounds_grow(segment, keys, hair->curve_radius.data(), bounds);
+
+                const size_t index = step * num_segments + i;
+                aabb_data[index].minX = bounds.min.x;
+                aabb_data[index].minY = bounds.min.y;
+                aabb_data[index].minZ = bounds.min.z;
+                aabb_data[index].maxX = bounds.max.x;
+                aabb_data[index].maxY = bounds.max.y;
+                aabb_data[index].maxZ = bounds.max.z;
+              }
             }
           }
         }
 
         // Upload AABB data to GPU
         aabb_data.copy_to_device();
+#  if OPTIX_ABI_VERSION >= 36
+        index_data.copy_to_device();
+        vertex_data.copy_to_device();
+#  endif
 
         vector<device_ptr> aabb_ptrs;
         aabb_ptrs.reserve(num_motion_steps);
+#  if OPTIX_ABI_VERSION >= 36
+        vector<device_ptr> width_ptrs;
+        vector<device_ptr> vertex_ptrs;
+        width_ptrs.reserve(num_motion_steps);
+        vertex_ptrs.reserve(num_motion_steps);
+#  endif
         for (size_t step = 0; step < num_motion_steps; ++step) {
           aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments * sizeof(OptixAabb));
+#  if OPTIX_ABI_VERSION >= 36
+          const device_ptr base_ptr = vertex_data.device_pointer +
+                                      step * num_vertices * sizeof(float4);
+          width_ptrs.push_back(base_ptr + 3 * sizeof(float));  // Offset by vertex size
+          vertex_ptrs.push_back(base_ptr);
+#  endif
         }
 
-        // Disable visibility test anyhit program, since it is already checked during intersection
-        // Those trace calls that require anyhit can force it with OPTIX_RAY_FLAG_ENFORCE_ANYHIT
-        unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
+        // Force a single any-hit call, so shadow record-all behavior works correctly
+        unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
         OptixBuildInput build_input = {};
-        build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
-        build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
-        build_input.aabbArray.numPrimitives = num_segments;
-        build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
-        build_input.aabbArray.flags = &build_flags;
-        build_input.aabbArray.numSbtRecords = 1;
-        build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset;
+#  if OPTIX_ABI_VERSION >= 36
+        if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
+          build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
+          build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
+          build_input.curveArray.numPrimitives = num_segments;
+          build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
+          build_input.curveArray.numVertices = num_vertices;
+          build_input.curveArray.vertexStrideInBytes = sizeof(float4);
+          build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data();
+          build_input.curveArray.widthStrideInBytes = sizeof(float4);
+          build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
+          build_input.curveArray.indexStrideInBytes = sizeof(int);
+          build_input.curveArray.flag = build_flags;
+          build_input.curveArray.primitiveIndexOffset = hair->optix_prim_offset;
+        }
+        else
+#  endif
+        {
+          // Disable visibility test any-hit program, since it is already checked during
+          // intersection. Those trace calls that require anyhit can force it with a ray flag.
+          build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
+
+          build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
+#  if OPTIX_ABI_VERSION < 23
+          build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
+          build_input.aabbArray.numPrimitives = num_segments;
+          build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
+          build_input.aabbArray.flags = &build_flags;
+          build_input.aabbArray.numSbtRecords = 1;
+          build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset;
+#  else
+          build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
+          build_input.customPrimitiveArray.numPrimitives = num_segments;
+          build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
+          build_input.customPrimitiveArray.flags = &build_flags;
+          build_input.customPrimitiveArray.numSbtRecords = 1;
+          build_input.customPrimitiveArray.primitiveIndexOffset = hair->optix_prim_offset;
+#  endif
+        }
 
         // Allocate memory for new BLAS and build it
         OptixTraversableHandle handle;
@@ -1257,8 +1434,8 @@ class OptiXDevice : public CUDADevice {
           vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step * sizeof(float3));
         }
 
-        // No special build flags for triangle primitives
-        unsigned int build_flags = OPTIX_GEOMETRY_FLAG_NONE;
+        // Force a single any-hit call, so shadow record-all behavior works correctly
+        unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
         OptixBuildInput build_input = {};
         build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
         build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
@@ -1324,9 +1501,26 @@ class OptiXDevice : public CUDADevice {
       // Set user instance ID to object index
       instance.instanceId = ob->get_device_index();
 
-      // Volumes have a special bit set in the visibility mask so a trace can mask only volumes
-      // See 'scene_intersect_volume' in bvh.h
-      instance.visibilityMask = (ob->geometry->has_volume ? 3 : 1);
+      // Have to have at least one bit in the mask, or else instance would always be culled
+      instance.visibilityMask = 1;
+
+      if (ob->geometry->has_volume) {
+        // Volumes have a special bit set in the visibility mask so a trace can mask only volumes
+        instance.visibilityMask |= 2;
+      }
+
+      if (ob->geometry->type == Geometry::HAIR) {
+        // Same applies to curves (so they can be skipped in local trace calls)
+        instance.visibilityMask |= 4;
+
+#  if OPTIX_ABI_VERSION >= 36
+        if (motion_blur && ob->geometry->has_motion_blur() && DebugFlags().optix.curves_api &&
+            static_cast<const Hair *>(ob->geometry)->curve_shape == CURVE_THICK) {
+          // Select between motion blur and non-motion blur built-in intersection module
+          instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
+        }
+#  endif
+      }
 
       // Insert motion traversable if object has motion
       if (motion_blur && ob->use_motion()) {
author	Patrick Mours <pmours@nvidia.com>	2020-07-06 13:25:54 +0300
committer	Patrick Mours <pmours@nvidia.com>	2020-07-07 16:39:02 +0300
commit	737bd549b6eeee81f0573ad8e305fb8d888d82ec (patch)
tree	fc8aaca871557d652287cb3d4f9ee3033ff06e46 /intern/cycles/device
parent	95f0f312799e10e4a9f5c884f8f20ec76d0ff363 (diff)