diff options
author | Sergey Sharybin <sergey@blender.org> | 2021-10-05 16:52:49 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey@blender.org> | 2021-10-05 17:09:31 +0300 |
commit | 9a0850c8c25ea0c28f6ac313f076fd6a8563d0b4 (patch) | |
tree | d21ef9ae4eaf6011d4ea20041980d5df0453091c /intern | |
parent | b1e6e63c22249edfb501a7579efa22810ea55aee (diff) |
Cycles: Fix wrong GPU state calculation
Currently was only used for logging, but better to fix the size so
that it matches reality.
The issue was caused by decoupling number of shadow intersections
and using much higher number for CPU. This caused the total state
on GPU to be logged as 10s of gigabytes instead of 100s of megabytes.
Differential Revision: https://developer.blender.org/D12755
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/integrator/path_trace_work_gpu.cpp | 27 |
1 files changed, 26 insertions, 1 deletions
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp index 17c49f244d2..7babc9d09fa 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.cpp +++ b/intern/cycles/integrator/path_trace_work_gpu.cpp @@ -30,6 +30,31 @@ CCL_NAMESPACE_BEGIN +static size_t estimate_single_state_size() +{ + size_t state_size = 0; + +#define KERNEL_STRUCT_BEGIN(name) for (int array_index = 0;; array_index++) { +#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type); +#define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type); +#define KERNEL_STRUCT_END(name) \ + break; \ + } +#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \ + if (array_index == gpu_array_size - 1) { \ + break; \ + } \ + } +#include "kernel/integrator/integrator_state_template.h" +#undef KERNEL_STRUCT_BEGIN +#undef KERNEL_STRUCT_MEMBER +#undef KERNEL_STRUCT_ARRAY_MEMBER +#undef KERNEL_STRUCT_END +#undef KERNEL_STRUCT_END_ARRAY + + return state_size; +} + PathTraceWorkGPU::PathTraceWorkGPU(Device *device, Film *film, DeviceScene *device_scene, @@ -47,7 +72,7 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device, num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE), work_tiles_(device, "work_tiles", MEM_READ_WRITE), display_rgba_half_(device, "display buffer half", MEM_READ_WRITE), - max_num_paths_(queue_->num_concurrent_states(sizeof(IntegratorStateCPU))), + max_num_paths_(queue_->num_concurrent_states(estimate_single_state_size())), min_num_active_paths_(queue_->num_concurrent_busy_states()), max_active_path_index_(0) { |