Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey@blender.org>2021-10-05 16:05:12 +0300
committerSergey Sharybin <sergey@blender.org>2021-10-06 16:46:32 +0300
commitc6275da852eab77e2cea1ae601a43a2dbaad6c27 (patch)
treeb1b810367a23465e6b9188d9f862599bad07c3b3 /intern/cycles/integrator
parente41dddd29a17a77e60bde6a2336fcd3937819bec (diff)
Fix T91922: Cycles artifacts with high volume nested level
Make volume stack allocated conditionally, potentially based on the actual nested level of objects in the scene. Currently the nested level is estimated by number of volume objects. This is a non-expensive check which is probably enough in practice to get almost perfect memory usage and performance. The conditional allocation is a bit tricky. For the CPU we declare and define maximum possible volume stack, because there are only that many integrator states on the CPU. On the GPU we declare outer SoA to have all volume stack elements, but only allocate actually needed ones. The actually used volume stack size is passed as a pre-processor, which seems to be easiest and fastest for the GPU state copy. There seems to be no speed regression in the demo files on RTX6000. Note that scenes with high nested level of volume will now be slower but correct. Differential Revision: https://developer.blender.org/D12759
Diffstat (limited to 'intern/cycles/integrator')
-rw-r--r--intern/cycles/integrator/path_trace_work_gpu.cpp18
1 files changed, 16 insertions, 2 deletions
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
index c29b0fb039e..8af8f9a02e2 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -23,6 +23,7 @@
#include "render/buffers.h"
#include "render/scene.h"
#include "util/util_logging.h"
+#include "util/util_string.h"
#include "util/util_tbb.h"
#include "util/util_time.h"
@@ -30,7 +31,7 @@
CCL_NAMESPACE_BEGIN
-static size_t estimate_single_state_size()
+static size_t estimate_single_state_size(DeviceScene *device_scene)
{
size_t state_size = 0;
@@ -45,12 +46,14 @@ static size_t estimate_single_state_size()
break; \
} \
}
+#define KERNEL_STRUCT_VOLUME_STACK_SIZE (device_scene->data.volume_stack_size)
#include "kernel/integrator/integrator_state_template.h"
#undef KERNEL_STRUCT_BEGIN
#undef KERNEL_STRUCT_MEMBER
#undef KERNEL_STRUCT_ARRAY_MEMBER
#undef KERNEL_STRUCT_END
#undef KERNEL_STRUCT_END_ARRAY
+#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
return state_size;
}
@@ -72,7 +75,7 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE),
work_tiles_(device, "work_tiles", MEM_READ_WRITE),
display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
- max_num_paths_(queue_->num_concurrent_states(estimate_single_state_size())),
+ max_num_paths_(queue_->num_concurrent_states(estimate_single_state_size(device_scene))),
min_num_active_paths_(queue_->num_concurrent_busy_states()),
max_active_path_index_(0)
{
@@ -125,12 +128,23 @@ void PathTraceWorkGPU::alloc_integrator_soa()
break; \
} \
}
+#define KERNEL_STRUCT_VOLUME_STACK_SIZE (device_scene_->data.volume_stack_size)
#include "kernel/integrator/integrator_state_template.h"
#undef KERNEL_STRUCT_BEGIN
#undef KERNEL_STRUCT_MEMBER
#undef KERNEL_STRUCT_ARRAY_MEMBER
#undef KERNEL_STRUCT_END
#undef KERNEL_STRUCT_END_ARRAY
+#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
+
+ if (VLOG_IS_ON(3)) {
+ size_t total_soa_size = 0;
+ for (auto &&soa_memory : integrator_state_soa_) {
+ total_soa_size += soa_memory->memory_size();
+ }
+
+ VLOG(3) << "GPU SoA state size: " << string_human_readable_size(total_soa_size);
+ }
}
void PathTraceWorkGPU::alloc_integrator_queue()