diff options
author | Michael Jones <michael_p_jones@apple.com> | 2021-11-18 16:25:05 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-11-18 16:38:02 +0300 |
commit | d19e35873f67c90b251ca38e007a83aa1eada211 (patch) | |
tree | 7e8659acea7f12b188077ada225e113b5df35e60 /intern/cycles/kernel/device/gpu | |
parent | c0d52db783eb3a6288c9af04298b2358fec76357 (diff) |
Cycles: several small fixes and additions for MSL
This patch contains many small leftover fixes and additions that are
required for Metal-enablement:
- Address space fixes and a few other small compile fixes
- Addition of missing functionality to the Metal adapter headers
- Addition of various scattered `__KERNEL_METAL__` blocks (e.g. for
atomic support & maths functions)
Ref T92212
Differential Revision: https://developer.blender.org/D13263
Diffstat (limited to 'intern/cycles/kernel/device/gpu')
-rw-r--r-- | intern/cycles/kernel/device/gpu/kernel.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/device/gpu/parallel_active_index.h | 4 |
2 files changed, 4 insertions, 4 deletions
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index dd0c6dd6893..60332af752c 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -464,7 +464,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) const auto num_active_pixels_mask = ccl_gpu_ballot(!converged); const int lane_id = ccl_gpu_thread_idx_x % ccl_gpu_warp_size; if (lane_id == 0) { - atomic_fetch_and_add_uint32(num_active_pixels, ccl_gpu_popc(num_active_pixels_mask)); + atomic_fetch_and_add_uint32(num_active_pixels, popcount(num_active_pixels_mask)); } } @@ -892,6 +892,6 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) const auto can_split_mask = ccl_gpu_ballot(can_split); const int lane_id = ccl_gpu_thread_idx_x % ccl_gpu_warp_size; if (lane_id == 0) { - atomic_fetch_and_add_uint32(num_possible_splits, ccl_gpu_popc(can_split_mask)); + atomic_fetch_and_add_uint32(num_possible_splits, popcount(can_split_mask)); } } diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h index f667ede2712..a5320edcb3c 100644 --- a/intern/cycles/kernel/device/gpu/parallel_active_index.h +++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h @@ -85,8 +85,8 @@ __device__ void gpu_parallel_active_index_array(const uint num_states, const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0; /* For each thread within a warp compute how many other active states precede it. */ - const uint thread_offset = ccl_gpu_popc(ccl_gpu_ballot(is_active) & - ccl_gpu_thread_mask(thread_warp)); + const uint thread_offset = popcount(ccl_gpu_ballot(is_active) & + ccl_gpu_thread_mask(thread_warp)); /* Last thread in warp stores number of active states for each warp. */ if (thread_warp == ccl_gpu_warp_size - 1) { |