From 9a0850c8c25ea0c28f6ac313f076fd6a8563d0b4 Mon Sep 17 00:00:00 2001
From: Sergey Sharybin <sergey@blender.org>
Date: Tue, 5 Oct 2021 15:52:49 +0200
Subject: Cycles: Fix wrong GPU state calculation

Currently was only used for logging, but better to fix the size so
that it matches reality.

The issue was caused by decoupling number of shadow intersections
and using much higher number for CPU. This caused the total state
on GPU to be logged as 10s of gigabytes instead of 100s of megabytes.

Differential Revision: https://developer.blender.org/D12755
---
 intern/cycles/integrator/path_trace_work_gpu.cpp | 27 +++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'intern')

diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
index 17c49f244d2..7babc9d09fa 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -30,6 +30,31 @@
 
 CCL_NAMESPACE_BEGIN
 
+static size_t estimate_single_state_size()
+{
+  size_t state_size = 0;
+
+#define KERNEL_STRUCT_BEGIN(name) for (int array_index = 0;; array_index++) {
+#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type);
+#define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type);
+#define KERNEL_STRUCT_END(name) \
+  break; \
+  }
+#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
+  if (array_index == gpu_array_size - 1) { \
+    break; \
+  } \
+  }
+#include "kernel/integrator/integrator_state_template.h"
+#undef KERNEL_STRUCT_BEGIN
+#undef KERNEL_STRUCT_MEMBER
+#undef KERNEL_STRUCT_ARRAY_MEMBER
+#undef KERNEL_STRUCT_END
+#undef KERNEL_STRUCT_END_ARRAY
+
+  return state_size;
+}
+
 PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
                                    Film *film,
                                    DeviceScene *device_scene,
@@ -47,7 +72,7 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
       num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE),
       work_tiles_(device, "work_tiles", MEM_READ_WRITE),
       display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
-      max_num_paths_(queue_->num_concurrent_states(sizeof(IntegratorStateCPU))),
+      max_num_paths_(queue_->num_concurrent_states(estimate_single_state_size())),
       min_num_active_paths_(queue_->num_concurrent_busy_states()),
       max_active_path_index_(0)
 {
-- 
cgit v1.2.3