Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht>2021-10-21 16:14:30 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-10-21 16:38:03 +0300
commitdf004637643241136a3294a63c7d4ca865cdea98 (patch)
treecfd103da7148e930b2b6bfdf1f4848824b3e6d64 /intern/cycles/kernel/device
parentfd560ef2af6aef06e6dad00854bfdd3fd81a8d6f (diff)
Cycles: add shadow path compaction for GPU rendering
Similar to main path compaction that happens before adding work tiles, this compacts shadow paths before launching kernels that may add shadow paths. Only do it when more than 50% of space is wasted. It's not a clear win in all scenes, some are up to 1.5% slower. Likely caused by different order of scheduling kernels having an unpredictable performance impact. Still feels like compaction is just the right thing to avoid cases where a few shadow paths can hold up a lot of main paths. Differential Revision: https://developer.blender.org/D12944
Diffstat (limited to 'intern/cycles/kernel/device')
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h41
1 files changed, 41 insertions, 0 deletions
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index fcb398f7e6d..eeac09d4b29 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -281,6 +281,18 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B
});
}
+extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
+ kernel_gpu_integrator_terminated_shadow_paths_array(int num_states,
+ int *indices,
+ int *num_indices,
+ int indices_offset)
+{
+ gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>(
+ num_states, indices + indices_offset, num_indices, [](const int state) {
+ return (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0);
+ });
+}
+
extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
kernel_gpu_integrator_sorted_paths_array(int num_states,
int num_states_limit,
@@ -332,6 +344,35 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_SORTED_INDEX_DEFAULT_B
}
}
+extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
+ kernel_gpu_integrator_compact_shadow_paths_array(int num_states,
+ int *indices,
+ int *num_indices,
+ int num_active_paths)
+{
+ gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>(
+ num_states, indices, num_indices, [num_active_paths](const int state) {
+ return (state >= num_active_paths) &&
+ (INTEGRATOR_STATE(state, shadow_path, queued_kernel) != 0);
+ });
+}
+
+extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
+ kernel_gpu_integrator_compact_shadow_states(const int *active_terminated_states,
+ const int active_states_offset,
+ const int terminated_states_offset,
+ const int work_size)
+{
+ const int global_index = ccl_gpu_global_id_x();
+
+ if (global_index < work_size) {
+ const int from_state = active_terminated_states[active_states_offset + global_index];
+ const int to_state = active_terminated_states[terminated_states_offset + global_index];
+
+ integrator_shadow_state_move(NULL, to_state, from_state);
+ }
+}
+
extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE)
kernel_gpu_prefix_sum(int *counter, int *prefix_sum, int num_values)
{