Merge branch 'blender-v3.1-release'

author: Brecht Van Lommel <brecht@blender.org> 2022-02-15 03:05:47 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2022-02-15 03:05:47 +0300
commit: a9a05d559798d9378f57d923dd18c9e63d7145ef (patch)
tree: f4b7410002e8b993d34ef820f17270a2454a847c
parent: 3d1e97825712bdaadf4038ba99fa7a0a87f7294c (diff)
parent: facd9d82682b30e14e3a7db8fe6af830428d65cc (diff)
2 files changed, 60 insertions, 26 deletions
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 5dacf2910be..26ab99766ad 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -283,7 +283,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
   ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;
 
   gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
-      num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
+                                  num_states,
+                                  indices,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 
 ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
@@ -298,7 +301,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
   ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;
 
   gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
-      num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
+                                  num_states,
+                                  indices,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 
 ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
@@ -310,7 +316,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
   ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) != 0);
 
   gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
-      num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
+                                  num_states,
+                                  indices,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 
 ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
@@ -323,7 +332,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
   ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == 0);
 
   gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
-      num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass);
+                                  num_states,
+                                  indices + indices_offset,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 
 ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
@@ -336,7 +348,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
   ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0);
 
   gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
-      num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass);
+                                  num_states,
+                                  indices + indices_offset,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 
 ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
@@ -379,7 +394,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
   ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;
 
   gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
-      num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
+                                  num_states,
+                                  indices,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 
 ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
@@ -412,7 +430,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
   ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;
 
   gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
-      num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
+                                  num_states,
+                                  indices,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 
 ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h
index 32dbe0ddaa3..7d7266d5edf 100644
--- a/intern/cycles/kernel/device/gpu/parallel_active_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h
@@ -22,19 +22,20 @@ CCL_NAMESPACE_BEGIN
 template<uint blocksize, typename IsActiveOp>
 __device__
 #endif
-void gpu_parallel_active_index_array_impl(const uint num_states,
-                                          ccl_global int *indices,
-                                          ccl_global int *num_indices,
+    void
+    gpu_parallel_active_index_array_impl(const uint num_states,
+                                         ccl_global int *indices,
+                                         ccl_global int *num_indices,
 #ifdef __KERNEL_METAL__
-                                          const uint is_active,
-                                          const uint blocksize,
-                                          const int thread_index,
-                                          const uint state_index,
-                                          const int ccl_gpu_warp_size,
-                                          const int thread_warp,
-                                          const int warp_index,
-                                          const int num_warps,
-                                          threadgroup int *warp_offset)
+                                         const uint is_active,
+                                         const uint blocksize,
+                                         const int thread_index,
+                                         const uint state_index,
+                                         const int ccl_gpu_warp_size,
+                                         const int thread_warp,
+                                         const int warp_index,
+                                         const int num_warps,
+                                         threadgroup int *warp_offset)
 {
 #else
                                           IsActiveOp is_active_op)
@@ -65,7 +66,7 @@ void gpu_parallel_active_index_array_impl(const uint num_states,
   ccl_gpu_syncthreads();
 
   /* Last thread in block converts per-warp sizes to offsets, increments global size of
-    * index array and gets offset to write to. */
+   * index array and gets offset to write to. */
   if (thread_index == blocksize - 1) {
     /* TODO: parallelize this. */
     int offset = 0;
@@ -91,15 +92,27 @@ void gpu_parallel_active_index_array_impl(const uint num_states,
 #ifdef __KERNEL_METAL__
 
 #  define gpu_parallel_active_index_array(dummy, num_states, indices, num_indices, is_active_op) \
-  const uint is_active = (ccl_gpu_global_id_x() < num_states) ? is_active_op(ccl_gpu_global_id_x()) : 0; \
-  gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active, \
-    metal_local_size, metal_local_id, metal_global_id, simdgroup_size, simd_lane_index, \
-    simd_group_index, num_simd_groups, simdgroup_offset)
+    const uint is_active = (ccl_gpu_global_id_x() < num_states) ? \
+                               is_active_op(ccl_gpu_global_id_x()) : \
+                               0; \
+    gpu_parallel_active_index_array_impl(num_states, \
+                                         indices, \
+                                         num_indices, \
+                                         is_active, \
+                                         metal_local_size, \
+                                         metal_local_id, \
+                                         metal_global_id, \
+                                         simdgroup_size, \
+                                         simd_lane_index, \
+                                         simd_group_index, \
+                                         num_simd_groups, \
+                                         simdgroup_offset)
 
 #else
 
-#  define gpu_parallel_active_index_array(blocksize, num_states, indices, num_indices, is_active_op) \
-  gpu_parallel_active_index_array_impl<blocksize>(num_states, indices, num_indices, is_active_op)
+#  define gpu_parallel_active_index_array( \
+      blocksize, num_states, indices, num_indices, is_active_op) \
+    gpu_parallel_active_index_array_impl<blocksize>(num_states, indices, num_indices, is_active_op)
 
 #endif
author	Brecht Van Lommel <brecht@blender.org>	2022-02-15 03:05:47 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2022-02-15 03:05:47 +0300
commit	a9a05d559798d9378f57d923dd18c9e63d7145ef (patch)
tree	f4b7410002e8b993d34ef820f17270a2454a847c
parent	3d1e97825712bdaadf4038ba99fa7a0a87f7294c (diff)
parent	facd9d82682b30e14e3a7db8fe6af830428d65cc (diff)