Cycles: make ambient occlusion pass take into account transparency again

Taking advantage of the new decoupled main and shadow paths. For CPU we just store two nested structs in the integrator state, one for direct light shadows and one for AO. For the GPU we restrict the number of shade surface states to be executed based on available space in the shadow paths queue. This also helps improve performance in benchmark scenes with an AO pass, since it is no longer needed to use the shader raytracing kernel there, which has worse performance. Differential Revision: https://developer.blender.org/D12900
author: Brecht Van Lommel <brecht@blender.org> 2021-10-17 19:08:00 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2021-10-20 18:50:31 +0300
commit: cccfa597ba69944817e0913944cf3c3d0a6e1165 (patch)
tree: 331fe58a76d3413bd247d745f56ba3b0f35dbeeb /intern/cycles/kernel/device/gpu/kernel.h
parent: 001f548227c413a4fdbee275744ea8bea886081a (diff)
1 files changed, 17 insertions, 5 deletions
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index b6df74e835a..fcb398f7e6d 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -282,11 +282,22 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B
 }
 
 extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
-    kernel_gpu_integrator_sorted_paths_array(
-        int num_states, int *indices, int *num_indices, int *key_prefix_sum, int kernel)
+    kernel_gpu_integrator_sorted_paths_array(int num_states,
+                                             int num_states_limit,
+                                             int *indices,
+                                             int *num_indices,
+                                             int *key_counter,
+                                             int *key_prefix_sum,
+                                             int kernel)
 {
   gpu_parallel_sorted_index_array<GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE>(
-      num_states, indices, num_indices, key_prefix_sum, [kernel](const int state) {
+      num_states,
+      num_states_limit,
+      indices,
+      num_indices,
+      key_counter,
+      key_prefix_sum,
+      [kernel](const int state) {
         return (INTEGRATOR_STATE(state, path, queued_kernel) == kernel) ?
                    INTEGRATOR_STATE(state, path, shader_sort_key) :
                    GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY;
@@ -322,9 +333,10 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_SORTED_INDEX_DEFAULT_B
 }
 
 extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE)
-    kernel_gpu_prefix_sum(int *values, int num_values)
+    kernel_gpu_prefix_sum(int *counter, int *prefix_sum, int num_values)
 {
-  gpu_parallel_prefix_sum<GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE>(values, num_values);
+  gpu_parallel_prefix_sum<GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE>(
+      counter, prefix_sum, num_values);
 }
 
 /* --------------------------------------------------------------------
author	Brecht Van Lommel <brecht@blender.org>	2021-10-17 19:08:00 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2021-10-20 18:50:31 +0300
commit	cccfa597ba69944817e0913944cf3c3d0a6e1165 (patch)
tree	331fe58a76d3413bd247d745f56ba3b0f35dbeeb /intern/cycles/kernel/device/gpu/kernel.h
parent	001f548227c413a4fdbee275744ea8bea886081a (diff)