diff options
author | Brecht Van Lommel <brecht> | 2021-10-21 16:14:30 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-10-21 16:38:03 +0300 |
commit | df004637643241136a3294a63c7d4ca865cdea98 (patch) | |
tree | cfd103da7148e930b2b6bfdf1f4848824b3e6d64 /intern/cycles/device | |
parent | fd560ef2af6aef06e6dad00854bfdd3fd81a8d6f (diff) |
Cycles: add shadow path compaction for GPU rendering
Similar to main path compaction that happens before adding work tiles, this
compacts shadow paths before launching kernels that may add shadow paths.
Only do it when more than 50% of space is wasted.
It's not a clear win in all scenes, some are up to 1.5% slower. Likely caused
by different order of scheduling kernels having an unpredictable performance
impact. Still feels like compaction is just the right thing to avoid cases
where a few shadow paths can hold up a lot of main paths.
Differential Revision: https://developer.blender.org/D12944
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/cuda/queue.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_kernel.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/device/hip/queue.cpp | 2 |
3 files changed, 10 insertions, 0 deletions
diff --git a/intern/cycles/device/cuda/queue.cpp b/intern/cycles/device/cuda/queue.cpp index 6b2c9a40082..09352a84181 100644 --- a/intern/cycles/device/cuda/queue.cpp +++ b/intern/cycles/device/cuda/queue.cpp @@ -113,6 +113,8 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *ar case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY: case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY: case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY: /* See parall_active_index.h for why this amount of shared memory is needed. */ shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int); break; diff --git a/intern/cycles/device/device_kernel.cpp b/intern/cycles/device/device_kernel.cpp index e0833331b77..1e282aac57e 100644 --- a/intern/cycles/device/device_kernel.cpp +++ b/intern/cycles/device/device_kernel.cpp @@ -64,6 +64,12 @@ const char *device_kernel_as_string(DeviceKernel kernel) return "integrator_compact_paths_array"; case DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES: return "integrator_compact_states"; + case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY: + return "integrator_terminated_shadow_paths_array"; + case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY: + return "integrator_compact_shadow_paths_array"; + case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES: + return "integrator_compact_shadow_states"; case DEVICE_KERNEL_INTEGRATOR_RESET: return "integrator_reset"; case DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS: diff --git a/intern/cycles/device/hip/queue.cpp b/intern/cycles/device/hip/queue.cpp index a612f59fb32..0f053ccbeb5 100644 --- a/intern/cycles/device/hip/queue.cpp +++ b/intern/cycles/device/hip/queue.cpp @@ -113,6 +113,8 @@ bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *arg case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY: case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY: case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY: /* See parall_active_index.h for why this amount of shared memory is needed. */ shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int); break; |