diff options
Diffstat (limited to 'intern/cycles/util/atomic.h')
-rw-r--r-- | intern/cycles/util/atomic.h | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/intern/cycles/util/atomic.h b/intern/cycles/util/atomic.h index faba411c769..afc3fd019df 100644 --- a/intern/cycles/util/atomic.h +++ b/intern/cycles/util/atomic.h @@ -63,6 +63,62 @@ ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest, # endif /* __KERNEL_CUDA__ */ +# ifdef __KERNEL_METAL__ + +// global address space versions +ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *_source, + const float operand) +{ + volatile ccl_global atomic_int *source = (ccl_global atomic_int *)_source; + union { + int int_value; + float float_value; + } new_value, prev_value; + prev_value.int_value = atomic_load_explicit(source, memory_order_relaxed); + do { + new_value.float_value = prev_value.float_value + operand; + } while (!atomic_compare_exchange_weak_explicit(source, + &prev_value.int_value, + new_value.int_value, + memory_order_relaxed, + memory_order_relaxed)); + + return new_value.float_value; +} + +# define atomic_fetch_and_add_uint32(p, x) \ + atomic_fetch_add_explicit((device atomic_uint *)p, x, memory_order_relaxed) +# define atomic_fetch_and_sub_uint32(p, x) \ + atomic_fetch_sub_explicit((device atomic_uint *)p, x, memory_order_relaxed) +# define atomic_fetch_and_inc_uint32(p) \ + atomic_fetch_add_explicit((device atomic_uint *)p, 1, memory_order_relaxed) +# define atomic_fetch_and_dec_uint32(p) \ + atomic_fetch_sub_explicit((device atomic_uint *)p, 1, memory_order_relaxed) +# define atomic_fetch_and_or_uint32(p, x) \ + atomic_fetch_or_explicit((device atomic_uint *)p, x, memory_order_relaxed) + +ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float *dest, + const float old_val, + const float new_val) +{ + int prev_value; + prev_value = __float_as_int(old_val); + atomic_compare_exchange_weak_explicit((ccl_global atomic_int *)dest, + &prev_value, + __float_as_int(new_val), + memory_order_relaxed, + memory_order_relaxed); + return __int_as_float(prev_value); +} + +# define atomic_store(p, x) atomic_store_explicit(p, x, memory_order_relaxed) +# define atomic_fetch(p) atomic_load_explicit(p, memory_order_relaxed) + +# define CCL_LOCAL_MEM_FENCE mem_flags::mem_threadgroup +# define ccl_barrier(flags) threadgroup_barrier(flags) + +# endif /* __KERNEL_METAL__ */ + #endif /* __KERNEL_GPU__ */ #endif /* __UTIL_ATOMIC_H__ */ |