Cycles: add support for BVH refit in OptiX

This avoids recomputing the BVH for geometries that do not have changes in topology but whose vertices are modified (like a simple character animation), and gives up to 40% speedup for BVH building. This is only available for viewport renders at the moment. Reviewed By: pmoursnv, brecht Differential Revision: https://developer.blender.org/D9353
author: Kévin Dietrich <kevin.dietrich@mailoo.org> 2020-10-30 19:14:17 +0300
committer: Kévin Dietrich <kevin.dietrich@mailoo.org> 2020-11-03 20:05:29 +0300
commit: 57d1aea64f0c54be499aea0ae0a28109e212f321 (patch)
tree: 1a8428f4a61d28be0c75aa88c44184602fd186ea /intern
parent: 216880bb4733a98af8bb6525c237b71b9dff2062 (diff)
3 files changed, 57 insertions, 16 deletions
diff --git a/intern/cycles/bvh/bvh_optix.cpp b/intern/cycles/bvh/bvh_optix.cpp
index 0527c0eeda8..f2666688123 100644
--- a/intern/cycles/bvh/bvh_optix.cpp
+++ b/intern/cycles/bvh/bvh_optix.cpp
@@ -37,6 +37,9 @@ BVHOptiX::BVHOptiX(const BVHParams &params_,
                    const vector<Object *> &objects_)
     : BVH(params_, geometry_, objects_)
 {
+  optix_handle = 0;
+  optix_data_handle = 0;
+  do_refit = false;
 }
 
 BVHOptiX::~BVHOptiX()
@@ -216,8 +219,7 @@ void BVHOptiX::pack_nodes(const BVHNode *)
 
 void BVHOptiX::refit_nodes()
 {
-  // TODO(pmours): Implement?
-  VLOG(1) << "Refit is not yet implemented for OptiX BVH.";
+  do_refit = true;
 }
 
 BVHNode *BVHOptiX::widen_children_nodes(const BVHNode *)
diff --git a/intern/cycles/bvh/bvh_optix.h b/intern/cycles/bvh/bvh_optix.h
index e4745b093b5..663cba67260 100644
--- a/intern/cycles/bvh/bvh_optix.h
+++ b/intern/cycles/bvh/bvh_optix.h
@@ -33,6 +33,10 @@ class BVHOptiX : public BVH {
   friend class BVH;
 
  public:
+  uint64_t optix_handle;
+  uint64_t optix_data_handle;
+  bool do_refit;
+
   BVHOptiX(const BVHParams &params,
            const vector<Geometry *> &geometry,
            const vector<Object *> &objects);
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 0d9c8dc7ce4..5558354d03c 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -18,6 +18,7 @@
 #ifdef WITH_OPTIX
 
 #  include "bvh/bvh.h"
+#  include "bvh/bvh_optix.h"
 #  include "device/cuda/device_cuda.h"
 #  include "device/device_denoising.h"
 #  include "device/device_intern.h"
@@ -1078,23 +1079,23 @@ class OptiXDevice : public CUDADevice {
 
   bool build_optix_bvh(const OptixBuildInput &build_input,
                        uint16_t num_motion_steps,
-                       OptixTraversableHandle &out_handle)
+                       OptixTraversableHandle &out_handle,
+                       CUdeviceptr &out_data,
+                       OptixBuildOperation operation)
   {
-    out_handle = 0;
-
     const CUDAContextScope scope(cuContext);
 
     // Compute memory usage
     OptixAccelBufferSizes sizes = {};
     OptixAccelBuildOptions options;
-    options.operation = OPTIX_BUILD_OPERATION_BUILD;
+    options.operation = operation;
     if (background) {
       // Prefer best performance and lowest memory consumption in background
       options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
     }
     else {
       // Prefer fast updates in viewport
-      options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD;
+      options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
     }
 
     options.motionOptions.numKeys = num_motion_steps;
@@ -1119,8 +1120,10 @@ class OptiXDevice : public CUDADevice {
       move_textures_to_host(size - free, false);
     }
 
-    CUdeviceptr out_data = 0;
-    check_result_cuda_ret(cuMemAlloc(&out_data, sizes.outputSizeInBytes));
+    if (operation == OPTIX_BUILD_OPERATION_BUILD) {
+      check_result_cuda_ret(cuMemAlloc(&out_data, sizes.outputSizeInBytes));
+    }
+
     as_mem.push_back(out_data);
 
     // Finally build the acceleration structure
@@ -1187,10 +1190,21 @@ class OptiXDevice : public CUDADevice {
     unordered_map<Geometry *, OptixTraversableHandle> geometry;
     geometry.reserve(bvh->geometry.size());
 
-    // Free all previous acceleration structures
+    // Free all previous acceleration structures which can not be refit
+    std::set<CUdeviceptr> refit_mem;
+
+    for (Geometry *geom : bvh->geometry) {
+      if (static_cast<BVHOptiX *>(geom->bvh)->do_refit) {
+        refit_mem.insert(static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle);
+      }
+    }
+
     for (CUdeviceptr mem : as_mem) {
-      cuMemFree(mem);
+      if (refit_mem.find(mem) == refit_mem.end()) {
+        cuMemFree(mem);
+      }
     }
+
     as_mem.clear();
 
     // Build bottom level acceleration structures (BLAS)
@@ -1201,6 +1215,21 @@ class OptiXDevice : public CUDADevice {
       if (geometry.find(geom) != geometry.end())
         continue;
 
+      OptixTraversableHandle handle;
+      OptixBuildOperation operation;
+      CUdeviceptr out_data;
+      // Refit is only possible in viewport for now.
+      if (static_cast<BVHOptiX *>(geom->bvh)->do_refit && !background) {
+        out_data = static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle;
+        handle = static_cast<BVHOptiX *>(geom->bvh)->optix_handle;
+        operation = OPTIX_BUILD_OPERATION_UPDATE;
+      }
+      else {
+        out_data = 0;
+        handle = 0;
+        operation = OPTIX_BUILD_OPERATION_BUILD;
+      }
+
       if (geom->type == Geometry::HAIR) {
         // Build BLAS for curve primitives
         Hair *const hair = static_cast<Hair *const>(ob->geometry);
@@ -1364,9 +1393,11 @@ class OptiXDevice : public CUDADevice {
         }
 
         // Allocate memory for new BLAS and build it
-        OptixTraversableHandle handle;
-        if (build_optix_bvh(build_input, num_motion_steps, handle)) {
+        if (build_optix_bvh(build_input, num_motion_steps, handle, out_data, operation)) {
           geometry.insert({ob->geometry, handle});
+          static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle = out_data;
+          static_cast<BVHOptiX *>(geom->bvh)->optix_handle = handle;
+          static_cast<BVHOptiX *>(geom->bvh)->do_refit = false;
         }
         else {
           return false;
@@ -1436,9 +1467,11 @@ class OptiXDevice : public CUDADevice {
         build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset;
 
         // Allocate memory for new BLAS and build it
-        OptixTraversableHandle handle;
-        if (build_optix_bvh(build_input, num_motion_steps, handle)) {
+        if (build_optix_bvh(build_input, num_motion_steps, handle, out_data, operation)) {
           geometry.insert({ob->geometry, handle});
+          static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle = out_data;
+          static_cast<BVHOptiX *>(geom->bvh)->optix_handle = handle;
+          static_cast<BVHOptiX *>(geom->bvh)->do_refit = false;
         }
         else {
           return false;
@@ -1612,7 +1645,9 @@ class OptiXDevice : public CUDADevice {
     build_input.instanceArray.instances = instances.device_pointer;
     build_input.instanceArray.numInstances = num_instances;
 
-    return build_optix_bvh(build_input, 0, tlas_handle);
+    CUdeviceptr out_data = 0;
+    tlas_handle = 0;
+    return build_optix_bvh(build_input, 0, tlas_handle, out_data, OPTIX_BUILD_OPERATION_BUILD);
   }
 
   void const_copy_to(const char *name, void *host, size_t size) override
author	Kévin Dietrich <kevin.dietrich@mailoo.org>	2020-10-30 19:14:17 +0300
committer	Kévin Dietrich <kevin.dietrich@mailoo.org>	2020-11-03 20:05:29 +0300
commit	57d1aea64f0c54be499aea0ae0a28109e212f321 (patch)
tree	1a8428f4a61d28be0c75aa88c44184602fd186ea /intern
parent	216880bb4733a98af8bb6525c237b71b9dff2062 (diff)