From 4d4113adc2623c50888b63eaca3a055d8cdf3045 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Wed, 29 Sep 2021 14:53:47 +0200 Subject: Cycles: record large number of transparent shadow intersections on CPU So we can do fewer intersection calls, only on the GPU do we need to save memory and do this in small steps. Ref T87836 --- intern/cycles/integrator/path_trace_work_gpu.cpp | 4 ++-- intern/cycles/kernel/integrator/integrator_state.h | 18 +++++++++++++----- .../kernel/integrator/integrator_state_template.h | 10 +++++++--- .../cycles/kernel/integrator/integrator_state_util.h | 11 ++++++++--- 4 files changed, 30 insertions(+), 13 deletions(-) (limited to 'intern') diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp index 450e8aaac04..e41d8d1d252 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.cpp +++ b/intern/cycles/integrator/path_trace_work_gpu.cpp @@ -95,8 +95,8 @@ void PathTraceWorkGPU::alloc_integrator_soa() #define KERNEL_STRUCT_END(name) \ break; \ } -#define KERNEL_STRUCT_END_ARRAY(name, array_size) \ - if (array_index == array_size - 1) { \ +#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \ + if (array_index == gpu_array_size - 1) { \ break; \ } \ } diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h index 094446be02c..f745ad3f4b9 100644 --- a/intern/cycles/kernel/integrator/integrator_state.h +++ b/intern/cycles/kernel/integrator/integrator_state.h @@ -60,7 +60,15 @@ CCL_NAMESPACE_BEGIN * TODO: these could be made dynamic depending on the features used in the scene. */ #define INTEGRATOR_VOLUME_STACK_SIZE VOLUME_STACK_SIZE -#define INTEGRATOR_SHADOW_ISECT_SIZE 4 + +#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024 +#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4 + +#ifdef __KERNEL_CPU__ +# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU +#else +# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU +#endif /* Data structures */ @@ -74,9 +82,9 @@ typedef struct IntegratorStateCPU { #define KERNEL_STRUCT_END(name) \ } \ name; -#define KERNEL_STRUCT_END_ARRAY(name, size) \ +#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \ } \ - name[size]; + name[cpu_size]; #include "kernel/integrator/integrator_state_template.h" #undef KERNEL_STRUCT_BEGIN #undef KERNEL_STRUCT_MEMBER @@ -103,9 +111,9 @@ typedef struct IntegratorStateGPU { #define KERNEL_STRUCT_END(name) \ } \ name; -#define KERNEL_STRUCT_END_ARRAY(name, size) \ +#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \ } \ - name[size]; + name[gpu_size]; #include "kernel/integrator/integrator_state_template.h" #undef KERNEL_STRUCT_BEGIN #undef KERNEL_STRUCT_MEMBER diff --git a/intern/cycles/kernel/integrator/integrator_state_template.h b/intern/cycles/kernel/integrator/integrator_state_template.h index 41dd1bfcdbf..0d8126c64aa 100644 --- a/intern/cycles/kernel/integrator/integrator_state_template.h +++ b/intern/cycles/kernel/integrator/integrator_state_template.h @@ -107,7 +107,7 @@ KERNEL_STRUCT_END(subsurface) KERNEL_STRUCT_BEGIN(volume_stack) KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, object, KERNEL_FEATURE_VOLUME) KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, shader, KERNEL_FEATURE_VOLUME) -KERNEL_STRUCT_END_ARRAY(volume_stack, INTEGRATOR_VOLUME_STACK_SIZE) +KERNEL_STRUCT_END_ARRAY(volume_stack, INTEGRATOR_VOLUME_STACK_SIZE, INTEGRATOR_VOLUME_STACK_SIZE) /********************************* Shadow Path State **************************/ @@ -153,11 +153,15 @@ KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACIN KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING) /* TODO: exclude for GPU. */ KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_END_ARRAY(shadow_isect, INTEGRATOR_SHADOW_ISECT_SIZE) +KERNEL_STRUCT_END_ARRAY(shadow_isect, + INTEGRATOR_SHADOW_ISECT_SIZE_CPU, + INTEGRATOR_SHADOW_ISECT_SIZE_GPU) /**************************** Shadow Volume Stack *****************************/ KERNEL_STRUCT_BEGIN(shadow_volume_stack) KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, object, KERNEL_FEATURE_VOLUME) KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, shader, KERNEL_FEATURE_VOLUME) -KERNEL_STRUCT_END_ARRAY(shadow_volume_stack, INTEGRATOR_VOLUME_STACK_SIZE) +KERNEL_STRUCT_END_ARRAY(shadow_volume_stack, + INTEGRATOR_VOLUME_STACK_SIZE, + INTEGRATOR_VOLUME_STACK_SIZE) diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h index cdf412fe22f..08d6cb00114 100644 --- a/intern/cycles/kernel/integrator/integrator_state_util.h +++ b/intern/cycles/kernel/integrator/integrator_state_util.h @@ -217,10 +217,10 @@ ccl_device_inline void integrator_state_copy_only(const IntegratorState to_state while (false) \ ; -# define KERNEL_STRUCT_END_ARRAY(name, array_size) \ +# define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \ ++index; \ } \ - while (index < array_size) \ + while (index < gpu_array_size) \ ; # include "kernel/integrator/integrator_state_template.h" @@ -264,7 +264,12 @@ ccl_device_inline void integrator_state_shadow_catcher_split(INTEGRATOR_STATE_AR IntegratorStateCPU *ccl_restrict split_state = state + 1; - *split_state = *state; + /* Only copy the required subset, since shadow intersections are big and irrelevant here. */ + split_state->path = state->path; + split_state->ray = state->ray; + split_state->isect = state->isect; + memcpy(split_state->volume_stack, state->volume_stack, sizeof(state->volume_stack)); + split_state->shadow_path = state->shadow_path; split_state->path.flag |= PATH_RAY_SHADOW_CATCHER_PASS; #endif -- cgit v1.2.3