From ba3ae9ea273f7e596607281ffd77871e5a44fca7 Mon Sep 17 00:00:00 2001 From: Bastien Montagne Date: Mon, 9 May 2016 17:03:08 +0200 Subject: Cleanup and refactor our atomic library. This commit: * Removes most of all dirty internal details from public atomi_ops.h file, and move them into /intern private subdir. * Removes unused 'architectures' (__apple__ and jemalloc). * Split each implementation into its own file. * Makes use of C99's limits.h system header to determine pointer and int size, instead of using fix hardcoded list of architectures. * Introduces new 'faked' atomics ops for floats. Note that we may add a lot more real and 'faked' atomic operations over integers and floats (multiplication, division, bitshift, bitwise booleans, etc.), as needs arise. Reviewers: sergey, campbellbarton Differential Revision: https://developer.blender.org/D1982 --- intern/atomic/atomic_ops.h | 505 +++++---------------------------------------- 1 file changed, 51 insertions(+), 454 deletions(-) (limited to 'intern/atomic/atomic_ops.h') diff --git a/intern/atomic/atomic_ops.h b/intern/atomic/atomic_ops.h index dd1bdd2328d..e4e1bdc1c09 100644 --- a/intern/atomic/atomic_ops.h +++ b/intern/atomic/atomic_ops.h @@ -1,11 +1,11 @@ /* - * Adopted from jemalloc with this license: + * Original code from jemalloc with this license: * * Copyright (C) 2002-2013 Jason Evans . * All rights reserved. * Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. * Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. - + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright notice(s), @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright notice(s), * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. - + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO @@ -24,64 +24,59 @@ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2016 Blender Foundation. + * All rights reserved. + * + * The Original Code is: adapted from jemalloc. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** + * \file atomic_ops.h + * \ingroup Atomic + * + * \author Copyright (C) 2016 Blender Foundation, adapted from jemalloc. + * \brief Provides wrapper around system-specific atomic primitives, and some extensions (faked-atomic operations + * over float numbers). */ #ifndef __ATOMIC_OPS_H__ #define __ATOMIC_OPS_H__ -#include - -#if defined (__APPLE__) -# include -#elif defined(_MSC_VER) -# define NOGDI -# ifndef NOMINMAX -# define NOMINMAX -# endif -# define WIN32_LEAN_AND_MEAN -# include -#elif defined(__arm__) +#if defined(__arm__) /* Attempt to fix compilation error on Debian armel kernel. * arm7 architecture does have both 32 and 64bit atomics, however * it's gcc doesn't have __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n defined. */ # define JE_FORCE_SYNC_COMPARE_AND_SWAP_1 -# define JE_FORCE_SYNC_COMPARE_AND_SWAP_8 # define JE_FORCE_SYNC_COMPARE_AND_SWAP_4 +# define JE_FORCE_SYNC_COMPARE_AND_SWAP_8 #endif -/* needed for int types */ -#include "../../source/blender/blenlib/BLI_sys_types.h" -#include -#include - -/* little macro so inline keyword works */ -#if defined(_MSC_VER) -# define ATOMIC_INLINE static __forceinline -#else -# if (defined(__APPLE__) && defined(__ppc__)) -/* static inline __attribute__ here breaks osx ppc gcc42 build */ -# define ATOMIC_INLINE static __attribute__((always_inline)) -# else -# define ATOMIC_INLINE static inline __attribute__((always_inline)) -# endif -#endif - -/* This is becoming a bit nastier that it was originally foreseen, - * consider using autoconfig detection instead. - */ -#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) || defined(__s390x__) || defined(__powerpc64__) || defined(__aarch64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__alpha__) || defined(__mips64) -# define LG_SIZEOF_PTR 3 -# define LG_SIZEOF_INT 2 -#else -# define LG_SIZEOF_PTR 2 -# define LG_SIZEOF_INT 2 -#endif +#include "intern/atomic_ops_utils.h" -/************************/ +/******************************************************************************/ /* Function prototypes. */ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new); @@ -102,420 +97,22 @@ ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x); ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x); ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new); -/******************************************************************************/ -/* 64-bit operations. */ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - return __sync_add_and_fetch(p, x); -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - return __sync_sub_and_fetch(p, x); -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - return __sync_val_compare_and_swap(v, old, _new); -} -#elif (defined(_MSC_VER)) -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x; -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x; -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - return InterlockedCompareExchange64((int64_t *)v, _new, old); -} -#elif (defined(__APPLE__)) -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - return (uint64_t)OSAtomicAdd64((int64_t)x, (int64_t *)p); -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - return (uint64_t)OSAtomicAdd64(-((int64_t)x), (int64_t *)p); -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - uint64_t init_val = *v; - OSAtomicCompareAndSwap64((int64_t)old, (int64_t)_new, (int64_t *)v); - return init_val; -} -# elif (defined(__amd64__) || defined(__x86_64__)) -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - return x; -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - x = (uint64_t)(-(int64_t)x); - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - return x; -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - uint64_t ret; - asm volatile ( - "lock; cmpxchgq %2,%1" - : "=a" (ret), "+m" (*v) - : "r" (_new), "0" (old) - : "memory"); - return ret; -} - -# elif (defined(JEMALLOC_ATOMIC9)) -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - /* - * atomic_fetchadd_64() doesn't exist, but we only ever use this - * function on LP64 systems, so atomic_fetchadd_long() will do. - */ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return atomic_fetchadd_long(p, (unsigned long)x) + x; -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x; -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return atomic_cmpset_long(v, old, _new); -} -# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - return __sync_add_and_fetch(p, x); -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - return __sync_sub_and_fetch(p, x); -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - return __sync_val_compare_and_swap(v, old, _new); -} -# else -# error "Missing implementation for 64-bit atomic operations" -# endif -#endif +/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation, + * which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads + * working on the same pointer at the same time is very low). */ +ATOMIC_INLINE float atomic_add_fl(float *p, const float x); /******************************************************************************/ -/* 32-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - return __sync_add_and_fetch(p, x); -} - -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - return __sync_sub_and_fetch(p, x); -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - return __sync_val_compare_and_swap(v, old, _new); -} -#elif (defined(_MSC_VER)) -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - return InterlockedExchangeAdd(p, x) + x; -} - -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - return InterlockedExchangeAdd(p, -((int32_t)x)) - x; -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - return InterlockedCompareExchange((long *)v, _new, old); -} -#elif (defined(__APPLE__)) -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - return (uint32_t)OSAtomicAdd32((int32_t)x, (int32_t *)p); -} - -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - return (uint32_t)OSAtomicAdd32(-((int32_t)x), (int32_t *)p); -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - uint32_t init_val = *v; - OSAtomicCompareAndSwap32((int32_t)old, (int32_t)_new, (int32_t *)v); - return init_val; -} -#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - return x; -} - -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - x = (uint32_t)(-(int32_t)x); - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - return x; -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - uint32_t ret; - asm volatile ( - "lock; cmpxchgl %2,%1" - : "=a" (ret), "+m" (*v) - : "r" (_new), "0" (old) - : "memory"); - return ret; -} -#elif (defined(JEMALLOC_ATOMIC9)) -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - return atomic_fetchadd_32(p, x) + x; -} +/* Include system-dependent implementations. */ -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x; -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - return atomic_cmpset_32(v, old, _new); -} -#elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4) -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - return __sync_add_and_fetch(p, x); -} - -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - return __sync_sub_and_fetch(p, x); -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - return __sync_val_compare_and_swap(v, old, _new); -} -#else -# error "Missing implementation for 32-bit atomic operations" -#endif - -/******************************************************************************/ -/* 8-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 -ATOMIC_INLINE uint8_t -atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) -{ - return __sync_fetch_and_or(p, b); -} -ATOMIC_INLINE uint8_t -atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) -{ - return __sync_fetch_and_and(p, b); -} -#elif (defined(_MSC_VER)) -#include -#pragma intrinsic(_InterlockedAnd8) -ATOMIC_INLINE uint8_t -atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) -{ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) - return InterlockedOr8((char *)p, (char)b); -#else - return _InterlockedOr8((char *)p, (char)b); -#endif -} -ATOMIC_INLINE uint8_t -atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) -{ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) - return InterlockedAnd8((char *)p, (char)b); -#else - return _InterlockedAnd8((char *)p, (char)b); -#endif -} -#elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1) -ATOMIC_INLINE uint8_t -atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) -{ - return __sync_fetch_and_or(p, b); -} -ATOMIC_INLINE uint8_t -atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) -{ - return __sync_fetch_and_and(p, b); -} +/* Note that we are using _unix flavor as fallback here (it will raise precompiler errors as needed). */ +#if defined(_MSC_VER) +# include "intern/atomic_ops_msvc.h" #else -# error "Missing implementation for 8-bit atomic operations" -#endif - -/******************************************************************************/ -/* size_t operations. */ -ATOMIC_INLINE size_t -atomic_add_z(size_t *p, size_t x) -{ - assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR); - -#if (LG_SIZEOF_PTR == 3) - return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x); +# include "intern/atomic_ops_unix.h" #endif -} -ATOMIC_INLINE size_t -atomic_sub_z(size_t *p, size_t x) -{ - assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR); - -#if (LG_SIZEOF_PTR == 3) - return (size_t)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x)); -#elif (LG_SIZEOF_PTR == 2) - return (size_t)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x)); -#endif -} - -ATOMIC_INLINE size_t -atomic_cas_z(size_t *v, size_t old, size_t _new) -{ - assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR); - -#if (LG_SIZEOF_PTR == 3) - return (size_t)atomic_cas_uint64((uint64_t *)v, - (uint64_t)old, - (uint64_t)_new); -#elif (LG_SIZEOF_PTR == 2) - return (size_t)atomic_cas_uint32((uint32_t *)v, - (uint32_t)old, - (uint32_t)_new); -#endif -} - -/******************************************************************************/ -/* unsigned operations. */ -ATOMIC_INLINE unsigned -atomic_add_u(unsigned *p, unsigned x) -{ - assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT); - -#if (LG_SIZEOF_INT == 3) - return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_INT == 2) - return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x); -#endif -} - -ATOMIC_INLINE unsigned -atomic_sub_u(unsigned *p, unsigned x) -{ - assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT); - -#if (LG_SIZEOF_INT == 3) - return (unsigned)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x)); -#elif (LG_SIZEOF_INT == 2) - return (unsigned)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x)); -#endif -} - -ATOMIC_INLINE unsigned -atomic_cas_u(unsigned *v, unsigned old, unsigned _new) -{ - assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT); - -#if (LG_SIZEOF_PTR == 3) - return (unsigned)atomic_cas_uint64((uint64_t *)v, - (uint64_t)old, - (uint64_t)_new); -#elif (LG_SIZEOF_PTR == 2) - return (unsigned)atomic_cas_uint32((uint32_t *)v, - (uint32_t)old, - (uint32_t)_new); -#endif -} +/* Include 'fake' atomic extensions, built over real atomic primitives. */ +#include "intern/atomic_ops_ext.h" #endif /* __ATOMIC_OPS_H__ */ -- cgit v1.2.3