diff options
author | Brecht Van Lommel <brecht@blender.org> | 2022-02-15 03:05:47 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2022-02-15 03:05:47 +0300 |
commit | a9a05d559798d9378f57d923dd18c9e63d7145ef (patch) | |
tree | f4b7410002e8b993d34ef820f17270a2454a847c | |
parent | 3d1e97825712bdaadf4038ba99fa7a0a87f7294c (diff) | |
parent | facd9d82682b30e14e3a7db8fe6af830428d65cc (diff) |
Merge branch 'blender-v3.1-release'
-rw-r--r-- | intern/cycles/kernel/device/gpu/kernel.h | 35 | ||||
-rw-r--r-- | intern/cycles/kernel/device/gpu/parallel_active_index.h | 51 |
2 files changed, 60 insertions, 26 deletions
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 5dacf2910be..26ab99766ad 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -283,7 +283,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index; gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); + num_states, + indices, + num_indices, + ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) @@ -298,7 +301,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index; gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); + num_states, + indices, + num_indices, + ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) @@ -310,7 +316,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) != 0); gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); + num_states, + indices, + num_indices, + ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) @@ -323,7 +332,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == 0); gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass); + num_states, + indices + indices_offset, + num_indices, + ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) @@ -336,7 +348,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0); gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass); + num_states, + indices + indices_offset, + num_indices, + ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) @@ -379,7 +394,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths; gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); + num_states, + indices, + num_indices, + ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) @@ -412,7 +430,10 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths; gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, - num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); + num_states, + indices, + num_indices, + ccl_gpu_kernel_lambda_pass); } ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h index 32dbe0ddaa3..7d7266d5edf 100644 --- a/intern/cycles/kernel/device/gpu/parallel_active_index.h +++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h @@ -22,19 +22,20 @@ CCL_NAMESPACE_BEGIN template<uint blocksize, typename IsActiveOp> __device__ #endif -void gpu_parallel_active_index_array_impl(const uint num_states, - ccl_global int *indices, - ccl_global int *num_indices, + void + gpu_parallel_active_index_array_impl(const uint num_states, + ccl_global int *indices, + ccl_global int *num_indices, #ifdef __KERNEL_METAL__ - const uint is_active, - const uint blocksize, - const int thread_index, - const uint state_index, - const int ccl_gpu_warp_size, - const int thread_warp, - const int warp_index, - const int num_warps, - threadgroup int *warp_offset) + const uint is_active, + const uint blocksize, + const int thread_index, + const uint state_index, + const int ccl_gpu_warp_size, + const int thread_warp, + const int warp_index, + const int num_warps, + threadgroup int *warp_offset) { #else IsActiveOp is_active_op) @@ -65,7 +66,7 @@ void gpu_parallel_active_index_array_impl(const uint num_states, ccl_gpu_syncthreads(); /* Last thread in block converts per-warp sizes to offsets, increments global size of - * index array and gets offset to write to. */ + * index array and gets offset to write to. */ if (thread_index == blocksize - 1) { /* TODO: parallelize this. */ int offset = 0; @@ -91,15 +92,27 @@ void gpu_parallel_active_index_array_impl(const uint num_states, #ifdef __KERNEL_METAL__ # define gpu_parallel_active_index_array(dummy, num_states, indices, num_indices, is_active_op) \ - const uint is_active = (ccl_gpu_global_id_x() < num_states) ? is_active_op(ccl_gpu_global_id_x()) : 0; \ - gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active, \ - metal_local_size, metal_local_id, metal_global_id, simdgroup_size, simd_lane_index, \ - simd_group_index, num_simd_groups, simdgroup_offset) + const uint is_active = (ccl_gpu_global_id_x() < num_states) ? \ + is_active_op(ccl_gpu_global_id_x()) : \ + 0; \ + gpu_parallel_active_index_array_impl(num_states, \ + indices, \ + num_indices, \ + is_active, \ + metal_local_size, \ + metal_local_id, \ + metal_global_id, \ + simdgroup_size, \ + simd_lane_index, \ + simd_group_index, \ + num_simd_groups, \ + simdgroup_offset) #else -# define gpu_parallel_active_index_array(blocksize, num_states, indices, num_indices, is_active_op) \ - gpu_parallel_active_index_array_impl<blocksize>(num_states, indices, num_indices, is_active_op) +# define gpu_parallel_active_index_array( \ + blocksize, num_states, indices, num_indices, is_active_op) \ + gpu_parallel_active_index_array_impl<blocksize>(num_states, indices, num_indices, is_active_op) #endif |