Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge

Somehow only a part of rBf4f8b6dde32b0438e0b97a6d8ebeb89802987127 ended up in Cycles X, causing the issue that commit fixed, "OPTIX_ERROR_INVALID_VALUE" when the system is out of memory, to show up again. This adds the missing changes to fix that problem. Maniphest Tasks: T93620 Differential Revision: https://developer.blender.org/D13488
author: Patrick Mours <pmours@nvidia.com> 2021-12-06 16:58:35 +0300
committer: Patrick Mours <pmours@nvidia.com> 2021-12-07 20:50:10 +0300
commit: e14f8c2dd765a5f20d652899434174daa039804b (patch)
tree: 3e37b27e8100a353c9dfecbe43bebb840bd3a791 /intern/cycles/device/optix/device_impl.cpp
parent: a37dac0a884ff9acd36eb887e8cd36df501c337b (diff)
1 files changed, 18 insertions, 7 deletions
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index a0c748fb6cd..da3c1ac57d1 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -46,14 +46,14 @@
 CCL_NAMESPACE_BEGIN
 
 OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
-    : device(device), queue(device), state(device, "__denoiser_state")
+    : device(device), queue(device), state(device, "__denoiser_state", true)
 {
 }
 
 OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
     : CUDADevice(info, stats, profiler),
       sbt_data(this, "__sbt", MEM_READ_ONLY),
-      launch_params(this, "__params"),
+      launch_params(this, "__params", false),
       denoiser_(this)
 {
   /* Make the CUDA context current. */
@@ -523,7 +523,7 @@ class OptiXDevice::DenoiseContext {
       : denoise_params(task.params),
         render_buffers(task.render_buffers),
         buffer_params(task.buffer_params),
-        guiding_buffer(device, "denoiser guiding passes buffer"),
+        guiding_buffer(device, "denoiser guiding passes buffer", true),
         num_samples(task.num_samples)
   {
     num_input_passes = 1;
@@ -1015,6 +1015,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
                                   const OptixBuildInput &build_input,
                                   uint16_t num_motion_steps)
 {
+  /* Allocate and build acceleration structures only one at a time, to prevent parallel builds
+   * from running out of memory (since both original and compacted acceleration structure memory
+   * may be allocated at the same time for the duration of this function). The builds would
+   * otherwise happen on the same CUDA stream anyway. */
+  static thread_mutex mutex;
+  thread_scoped_lock lock(mutex);
+
   const CUDAContextScope scope(this);
 
   const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
@@ -1040,13 +1047,14 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
   optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
 
   /* Allocate required output buffers. */
-  device_only_memory<char> temp_mem(this, "optix temp as build mem");
+  device_only_memory<char> temp_mem(this, "optix temp as build mem", true);
   temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
   if (!temp_mem.device_pointer) {
     /* Make sure temporary memory allocation succeeded. */
     return false;
   }
 
+  /* Acceleration structure memory has to be allocated on the device (not allowed on the host). */
   device_only_memory<char> &out_data = *bvh->as_data;
   if (operation == OPTIX_BUILD_OPERATION_BUILD) {
     assert(out_data.device == this);
@@ -1095,12 +1103,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
 
     /* There is no point compacting if the size does not change. */
     if (compacted_size < sizes.outputSizeInBytes) {
-      device_only_memory<char> compacted_data(this, "optix compacted as");
+      device_only_memory<char> compacted_data(this, "optix compacted as", false);
       compacted_data.alloc_to_device(compacted_size);
-      if (!compacted_data.device_pointer)
+      if (!compacted_data.device_pointer) {
         /* Do not compact if memory allocation for compacted acceleration structure fails.
          * Can just use the uncompacted one then, so succeed here regardless. */
         return !have_error();
+      }
 
       optix_assert(optixAccelCompact(
           context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
@@ -1111,6 +1120,8 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
 
       std::swap(out_data.device_size, compacted_data.device_size);
       std::swap(out_data.device_pointer, compacted_data.device_pointer);
+      /* Original acceleration structure memory is freed when 'compacted_data' goes out of scope.
+       */
     }
   }
 
@@ -1208,7 +1219,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
               const float4 pw = make_float4(
                   curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
 
-              /* Convert Catmull-Rom data to Bezier spline. */
+              /* Convert Catmull-Rom data to B-spline. */
               static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
               static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
               static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
author	Patrick Mours <pmours@nvidia.com>	2021-12-06 16:58:35 +0300
committer	Patrick Mours <pmours@nvidia.com>	2021-12-07 20:50:10 +0300
commit	e14f8c2dd765a5f20d652899434174daa039804b (patch)
tree	3e37b27e8100a353c9dfecbe43bebb840bd3a791 /intern/cycles/device/optix/device_impl.cpp
parent	a37dac0a884ff9acd36eb887e8cd36df501c337b (diff)