1 files changed, 109 insertions, 78 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 43b1fb30baf..95234845f98 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -18,6 +18,7 @@
 #ifdef WITH_OPTIX
 
 #  include "bvh/bvh.h"
+#  include "bvh/bvh_optix.h"
 #  include "device/cuda/device_cuda.h"
 #  include "device/device_denoising.h"
 #  include "device/device_intern.h"
@@ -137,9 +138,6 @@ class OptiXDevice : public CUDADevice {
     PG_HITD_MOTION,
     PG_HITS_MOTION,
 #  endif
-#  ifdef WITH_CYCLES_DEBUG
-    PG_EXCP,
-#  endif
     PG_BAKE,  // kernel_bake_evaluate
     PG_DISP,  // kernel_displace_evaluate
     PG_BACK,  // kernel_background_evaluate
@@ -232,6 +230,9 @@ class OptiXDevice : public CUDADevice {
           }
         };
 #  endif
+#  if OPTIX_ABI_VERSION >= 41 && defined(WITH_CYCLES_DEBUG)
+    options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
+#  endif
     check_result_optix(optixDeviceContextCreate(cuContext, &options, &context));
 #  ifdef WITH_CYCLES_LOGGING
     check_result_optix(optixDeviceContextSetLogCallback(
@@ -368,6 +369,12 @@ class OptiXDevice : public CUDADevice {
     module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
     module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
 #  endif
+
+#  if OPTIX_ABI_VERSION >= 41
+    module_options.boundValues = nullptr;
+    module_options.numBoundValues = 0;
+#  endif
+
     OptixPipelineCompileOptions pipeline_options;
     // Default to no motion blur and two-level graph, since it is the fastest option
     pipeline_options.usesMotionBlur = false;
@@ -375,12 +382,7 @@ class OptiXDevice : public CUDADevice {
         OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING;
     pipeline_options.numPayloadValues = 6;
     pipeline_options.numAttributeValues = 2;  // u, v
-#  ifdef WITH_CYCLES_DEBUG
-    pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW |
-                                      OPTIX_EXCEPTION_FLAG_TRACE_DEPTH;
-#  else
     pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE;
-#  endif
     pipeline_options.pipelineLaunchParamsVariableName = "__params";  // See kernel_globals.h
 
 #  if OPTIX_ABI_VERSION >= 36
@@ -505,12 +507,6 @@ class OptiXDevice : public CUDADevice {
       group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit";
     }
 
-#  ifdef WITH_CYCLES_DEBUG
-    group_descs[PG_EXCP].kind = OPTIX_PROGRAM_GROUP_KIND_EXCEPTION;
-    group_descs[PG_EXCP].exception.module = optix_module;
-    group_descs[PG_EXCP].exception.entryFunctionName = "__exception__kernel_optix_exception";
-#  endif
-
     if (requested_features.use_baking) {
       group_descs[PG_BAKE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
       group_descs[PG_BAKE].raygen.module = optix_module;
@@ -578,9 +574,6 @@ class OptiXDevice : public CUDADevice {
         groups[PG_HITD_MOTION],
         groups[PG_HITS_MOTION],
 #  endif
-#  ifdef WITH_CYCLES_DEBUG
-        groups[PG_EXCP],
-#  endif
       };
       check_result_optix_ret(
           optixPipelineCreate(context,
@@ -618,9 +611,6 @@ class OptiXDevice : public CUDADevice {
         groups[PG_HITD_MOTION],
         groups[PG_HITS_MOTION],
 #  endif
-#  ifdef WITH_CYCLES_DEBUG
-        groups[PG_EXCP],
-#  endif
       };
       check_result_optix_ret(
           optixPipelineCreate(context,
@@ -734,9 +724,6 @@ class OptiXDevice : public CUDADevice {
 
       OptixShaderBindingTable sbt_params = {};
       sbt_params.raygenRecord = sbt_data.device_pointer + PG_RGEN * sizeof(SbtRecord);
-#  ifdef WITH_CYCLES_DEBUG
-      sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord);
-#  endif
       sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord);
       sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
       sbt_params.missRecordCount = 1;
@@ -1064,9 +1051,6 @@ class OptiXDevice : public CUDADevice {
 
       OptixShaderBindingTable sbt_params = {};
       sbt_params.raygenRecord = sbt_data.device_pointer + rgen_index * sizeof(SbtRecord);
-#  ifdef WITH_CYCLES_DEBUG
-      sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord);
-#  endif
       sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord);
       sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
       sbt_params.missRecordCount = 1;
@@ -1095,23 +1079,23 @@ class OptiXDevice : public CUDADevice {
 
   bool build_optix_bvh(const OptixBuildInput &build_input,
                        uint16_t num_motion_steps,
-                       OptixTraversableHandle &out_handle)
+                       OptixTraversableHandle &out_handle,
+                       CUdeviceptr &out_data,
+                       OptixBuildOperation operation)
   {
-    out_handle = 0;
-
     const CUDAContextScope scope(cuContext);
 
     // Compute memory usage
     OptixAccelBufferSizes sizes = {};
     OptixAccelBuildOptions options;
-    options.operation = OPTIX_BUILD_OPERATION_BUILD;
+    options.operation = operation;
     if (background) {
       // Prefer best performance and lowest memory consumption in background
       options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
     }
     else {
       // Prefer fast updates in viewport
-      options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD;
+      options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
     }
 
     options.motionOptions.numKeys = num_motion_steps;
@@ -1136,8 +1120,10 @@ class OptiXDevice : public CUDADevice {
       move_textures_to_host(size - free, false);
     }
 
-    CUdeviceptr out_data = 0;
-    check_result_cuda_ret(cuMemAlloc(&out_data, sizes.outputSizeInBytes));
+    if (operation == OPTIX_BUILD_OPERATION_BUILD) {
+      check_result_cuda_ret(cuMemAlloc(&out_data, sizes.outputSizeInBytes));
+    }
+
     as_mem.push_back(out_data);
 
     // Finally build the acceleration structure
@@ -1204,23 +1190,49 @@ class OptiXDevice : public CUDADevice {
     unordered_map<Geometry *, OptixTraversableHandle> geometry;
     geometry.reserve(bvh->geometry.size());
 
-    // Free all previous acceleration structures
+    // Free all previous acceleration structures which can not be refit
+    std::set<CUdeviceptr> refit_mem;
+
+    for (Geometry *geom : bvh->geometry) {
+      if (static_cast<BVHOptiX *>(geom->bvh)->do_refit) {
+        refit_mem.insert(static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle);
+      }
+    }
+
     for (CUdeviceptr mem : as_mem) {
-      cuMemFree(mem);
+      if (refit_mem.find(mem) == refit_mem.end()) {
+        cuMemFree(mem);
+      }
     }
+
     as_mem.clear();
 
     // Build bottom level acceleration structures (BLAS)
     // Note: Always keep this logic in sync with bvh_optix.cpp!
     for (Object *ob : bvh->objects) {
       // Skip geometry for which acceleration structure already exists
-      Geometry *geom = ob->geometry;
+      Geometry *geom = ob->get_geometry();
       if (geometry.find(geom) != geometry.end())
         continue;
 
-      if (geom->type == Geometry::HAIR) {
+      OptixTraversableHandle handle;
+      OptixBuildOperation operation;
+      CUdeviceptr out_data;
+      // Refit is only possible in viewport for now.
+      if (static_cast<BVHOptiX *>(geom->bvh)->do_refit && !background) {
+        out_data = static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle;
+        handle = static_cast<BVHOptiX *>(geom->bvh)->optix_handle;
+        operation = OPTIX_BUILD_OPERATION_UPDATE;
+      }
+      else {
+        out_data = 0;
+        handle = 0;
+        operation = OPTIX_BUILD_OPERATION_BUILD;
+      }
+
+      if (geom->geometry_type == Geometry::HAIR) {
         // Build BLAS for curve primitives
-        Hair *const hair = static_cast<Hair *const>(ob->geometry);
+        Hair *const hair = static_cast<Hair *const>(ob->get_geometry());
         if (hair->num_curves() == 0) {
           continue;
         }
@@ -1229,8 +1241,8 @@ class OptiXDevice : public CUDADevice {
 
         size_t num_motion_steps = 1;
         Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-        if (motion_blur && hair->use_motion_blur && motion_keys) {
-          num_motion_steps = hair->motion_steps;
+        if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
+          num_motion_steps = hair->get_motion_steps();
         }
 
         device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY);
@@ -1250,16 +1262,19 @@ class OptiXDevice : public CUDADevice {
         // Get AABBs for each motion step
         for (size_t step = 0; step < num_motion_steps; ++step) {
           // The center step for motion vertices is not stored in the attribute
-          const float3 *keys = hair->curve_keys.data();
+          const float3 *keys = hair->get_curve_keys().data();
           size_t center_step = (num_motion_steps - 1) / 2;
           if (step != center_step) {
             size_t attr_offset = (step > center_step) ? step - 1 : step;
             // Technically this is a float4 array, but sizeof(float3) is the same as sizeof(float4)
-            keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size();
+            keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size();
           }
 
           for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
             const Hair::Curve curve = hair->get_curve(j);
+#  if OPTIX_ABI_VERSION >= 36
+            const array<float> &curve_radius = hair->get_curve_radius();
+#  endif
 
             for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
 #  if OPTIX_ABI_VERSION >= 36
@@ -1272,10 +1287,8 @@ class OptiXDevice : public CUDADevice {
                 const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
                 const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
                 const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
-                const float4 pw = make_float4(hair->curve_radius[ka],
-                                              hair->curve_radius[k0],
-                                              hair->curve_radius[k1],
-                                              hair->curve_radius[kb]);
+                const float4 pw = make_float4(
+                    curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
 
                 // Convert Catmull-Rom data to Bezier spline
                 static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
@@ -1298,7 +1311,7 @@ class OptiXDevice : public CUDADevice {
 #  endif
               {
                 BoundBox bounds = BoundBox::empty;
-                curve.bounds_grow(segment, keys, hair->curve_radius.data(), bounds);
+                curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds);
 
                 const size_t index = step * num_segments + i;
                 aabb_data[index].minX = bounds.min.x;
@@ -1381,37 +1394,41 @@ class OptiXDevice : public CUDADevice {
         }
 
         // Allocate memory for new BLAS and build it
-        OptixTraversableHandle handle;
-        if (build_optix_bvh(build_input, num_motion_steps, handle)) {
-          geometry.insert({ob->geometry, handle});
+        if (build_optix_bvh(build_input, num_motion_steps, handle, out_data, operation)) {
+          geometry.insert({ob->get_geometry(), handle});
+          static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle = out_data;
+          static_cast<BVHOptiX *>(geom->bvh)->optix_handle = handle;
+          static_cast<BVHOptiX *>(geom->bvh)->do_refit = false;
         }
         else {
           return false;
         }
       }
-      else if (geom->type == Geometry::MESH || geom->type == Geometry::VOLUME) {
+      else if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
         // Build BLAS for triangle primitives
-        Mesh *const mesh = static_cast<Mesh *const>(ob->geometry);
+        Mesh *const mesh = static_cast<Mesh *const>(ob->get_geometry());
         if (mesh->num_triangles() == 0) {
           continue;
         }
 
-        const size_t num_verts = mesh->verts.size();
+        const size_t num_verts = mesh->get_verts().size();
 
         size_t num_motion_steps = 1;
         Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-        if (motion_blur && mesh->use_motion_blur && motion_keys) {
-          num_motion_steps = mesh->motion_steps;
+        if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
+          num_motion_steps = mesh->get_motion_steps();
         }
 
         device_vector<int> index_data(this, "temp_index_data", MEM_READ_ONLY);
-        index_data.alloc(mesh->triangles.size());
-        memcpy(index_data.data(), mesh->triangles.data(), mesh->triangles.size() * sizeof(int));
+        index_data.alloc(mesh->get_triangles().size());
+        memcpy(index_data.data(),
+               mesh->get_triangles().data(),
+               mesh->get_triangles().size() * sizeof(int));
         device_vector<float3> vertex_data(this, "temp_vertex_data", MEM_READ_ONLY);
         vertex_data.alloc(num_verts * num_motion_steps);
 
         for (size_t step = 0; step < num_motion_steps; ++step) {
-          const float3 *verts = mesh->verts.data();
+          const float3 *verts = mesh->get_verts().data();
 
           size_t center_step = (num_motion_steps - 1) / 2;
           // The center step for motion vertices is not stored in the attribute
@@ -1453,9 +1470,11 @@ class OptiXDevice : public CUDADevice {
         build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset;
 
         // Allocate memory for new BLAS and build it
-        OptixTraversableHandle handle;
-        if (build_optix_bvh(build_input, num_motion_steps, handle)) {
-          geometry.insert({ob->geometry, handle});
+        if (build_optix_bvh(build_input, num_motion_steps, handle, out_data, operation)) {
+          geometry.insert({ob->get_geometry(), handle});
+          static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle = out_data;
+          static_cast<BVHOptiX *>(geom->bvh)->optix_handle = handle;
+          static_cast<BVHOptiX *>(geom->bvh)->do_refit = false;
         }
         else {
           return false;
@@ -1464,8 +1483,10 @@ class OptiXDevice : public CUDADevice {
     }
 
     // Fill instance descriptions
+#  if OPTIX_ABI_VERSION < 41
     device_vector<OptixAabb> aabbs(this, "tlas_aabbs", MEM_READ_ONLY);
     aabbs.alloc(bvh->objects.size());
+#  endif
     device_vector<OptixInstance> instances(this, "tlas_instances", MEM_READ_ONLY);
     instances.alloc(bvh->objects.size());
 
@@ -1475,12 +1496,13 @@ class OptiXDevice : public CUDADevice {
         continue;
 
       // Create separate instance for triangle/curve meshes of an object
-      auto handle_it = geometry.find(ob->geometry);
+      const auto handle_it = geometry.find(ob->get_geometry());
       if (handle_it == geometry.end()) {
         continue;
       }
       OptixTraversableHandle handle = handle_it->second;
 
+#  if OPTIX_ABI_VERSION < 41
       OptixAabb &aabb = aabbs[num_instances];
       aabb.minX = ob->bounds.min.x;
       aabb.minY = ob->bounds.min.y;
@@ -1488,6 +1510,7 @@ class OptiXDevice : public CUDADevice {
       aabb.maxX = ob->bounds.max.x;
       aabb.maxY = ob->bounds.max.y;
       aabb.maxZ = ob->bounds.max.z;
+#  endif
 
       OptixInstance &instance = instances[num_instances++];
       memset(&instance, 0, sizeof(instance));
@@ -1503,18 +1526,19 @@ class OptiXDevice : public CUDADevice {
       // Have to have at least one bit in the mask, or else instance would always be culled
       instance.visibilityMask = 1;
 
-      if (ob->geometry->has_volume) {
+      if (ob->get_geometry()->has_volume) {
         // Volumes have a special bit set in the visibility mask so a trace can mask only volumes
         instance.visibilityMask |= 2;
       }
 
-      if (ob->geometry->type == Geometry::HAIR) {
+      if (ob->get_geometry()->geometry_type == Geometry::HAIR) {
         // Same applies to curves (so they can be skipped in local trace calls)
         instance.visibilityMask |= 4;
 
 #  if OPTIX_ABI_VERSION >= 36
-        if (motion_blur && ob->geometry->has_motion_blur() && DebugFlags().optix.curves_api &&
-            static_cast<const Hair *>(ob->geometry)->curve_shape == CURVE_THICK) {
+        if (motion_blur && ob->get_geometry()->has_motion_blur() &&
+            DebugFlags().optix.curves_api &&
+            static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
           // Select between motion blur and non-motion blur built-in intersection module
           instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
         }
@@ -1523,7 +1547,7 @@ class OptiXDevice : public CUDADevice {
 
       // Insert motion traversable if object has motion
       if (motion_blur && ob->use_motion()) {
-        size_t motion_keys = max(ob->motion.size(), 2) - 2;
+        size_t motion_keys = max(ob->get_motion().size(), 2) - 2;
         size_t motion_transform_size = sizeof(OptixSRTMotionTransform) +
                                        motion_keys * sizeof(OptixSRTData);
 
@@ -1537,16 +1561,17 @@ class OptiXDevice : public CUDADevice {
         OptixSRTMotionTransform &motion_transform = *reinterpret_cast<OptixSRTMotionTransform *>(
             new uint8_t[motion_transform_size]);
         motion_transform.child = handle;
-        motion_transform.motionOptions.numKeys = ob->motion.size();
+        motion_transform.motionOptions.numKeys = ob->get_motion().size();
         motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE;
         motion_transform.motionOptions.timeBegin = 0.0f;
         motion_transform.motionOptions.timeEnd = 1.0f;
 
         OptixSRTData *const srt_data = motion_transform.srtData;
-        array<DecomposedTransform> decomp(ob->motion.size());
-        transform_motion_decompose(decomp.data(), ob->motion.data(), ob->motion.size());
+        array<DecomposedTransform> decomp(ob->get_motion().size());
+        transform_motion_decompose(
+            decomp.data(), ob->get_motion().data(), ob->get_motion().size());
 
-        for (size_t i = 0; i < ob->motion.size(); ++i) {
+        for (size_t i = 0; i < ob->get_motion().size(); ++i) {
           // Scale
           srt_data[i].sx = decomp[i].y.w;  // scale.x.x
           srt_data[i].sy = decomp[i].z.w;  // scale.y.y
@@ -1593,9 +1618,9 @@ class OptiXDevice : public CUDADevice {
       else {
         instance.traversableHandle = handle;
 
-        if (ob->geometry->is_instanced()) {
+        if (ob->get_geometry()->is_instanced()) {
           // Set transform matrix
-          memcpy(instance.transform, &ob->tfm, sizeof(instance.transform));
+          memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform));
         }
         else {
           // Disable instance transform if geometry already has it applied to vertex data
@@ -1608,20 +1633,26 @@ class OptiXDevice : public CUDADevice {
     }
 
     // Upload instance descriptions
+#  if OPTIX_ABI_VERSION < 41
     aabbs.resize(num_instances);
     aabbs.copy_to_device();
+#  endif
     instances.resize(num_instances);
     instances.copy_to_device();
 
     // Build top-level acceleration structure (TLAS)
     OptixBuildInput build_input = {};
     build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES;
-    build_input.instanceArray.instances = instances.device_pointer;
-    build_input.instanceArray.numInstances = num_instances;
+#  if OPTIX_ABI_VERSION < 41  // Instance AABBs no longer need to be set since OptiX 7.2
     build_input.instanceArray.aabbs = aabbs.device_pointer;
     build_input.instanceArray.numAabbs = num_instances;
+#  endif
+    build_input.instanceArray.instances = instances.device_pointer;
+    build_input.instanceArray.numInstances = num_instances;
 
-    return build_optix_bvh(build_input, 0, tlas_handle);
+    CUdeviceptr out_data = 0;
+    tlas_handle = 0;
+    return build_optix_bvh(build_input, 0, tlas_handle, out_data, OPTIX_BUILD_OPERATION_BUILD);
   }
 
   void const_copy_to(const char *name, void *host, size_t size) override
@@ -1725,8 +1756,8 @@ bool device_optix_init()
   const OptixResult result = optixInit();
 
   if (result == OPTIX_ERROR_UNSUPPORTED_ABI_VERSION) {
-    VLOG(1) << "OptiX initialization failed because driver does not support ABI version "
-            << OPTIX_ABI_VERSION;
+    VLOG(1) << "OptiX initialization failed because the installed NVIDIA driver is too old. "
+               "Please update to the latest driver first!";
     return false;
   }
   else if (result != OPTIX_SUCCESS) {