diff options
author | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2017-04-04 19:47:52 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-04-04 19:47:52 +0300 |
commit | ccfc4567dc3e2a37fb42cfbc64d10eb526e7da7b (patch) | |
tree | ee0aad129b1c0c3081fbbbb722a1cc5861a08eef | |
parent | 649f04d07711440ec2699ce7ec1af4ab71d6dbf9 (diff) | |
parent | 8241cd7b6ed1425eeb88fd380090575978e358f4 (diff) |
Merge pull request #78 from ilya-biryukov/master
Fix compilation error when compiling with 'clang -x cuda'.
-rw-r--r-- | src/common_kernel.h | 52 |
1 files changed, 26 insertions, 26 deletions
diff --git a/src/common_kernel.h b/src/common_kernel.h index 28fbc85..cc71f8a 100644 --- a/src/common_kernel.h +++ b/src/common_kernel.h @@ -30,6 +30,32 @@ #define BAR(type, barid, nthreads) \ BAR_EXPAND(type, barid, ROUNDUP(nthreads, WARP_SIZE)) +template<typename T> inline __device__ +T vFetch(const volatile T* ptr) { + return *ptr; +} + +#ifdef CUDA_HAS_HALF +template<> inline __device__ +half vFetch<half>(const volatile half* ptr) { + half r; + r.x = ptr->x; + return r; +} +#endif + +template<typename T> inline __device__ +void vStore(volatile T* ptr, const T val) { + *ptr = val; +} + +#ifdef CUDA_HAS_HALF +template<> inline __device__ +void vStore<half>(volatile half* ptr, const half val) { + ptr->x = val.x; +} +#endif + __device__ unsigned int spinct; // Spin wait until func evaluates to true @@ -225,32 +251,6 @@ __device__ inline volatile T* AlignUp(volatile T * ptr, size_t align) { return reinterpret_cast<volatile T*>(ALIGNUP(ptrval, align)); } -template<typename T> inline __device__ -T vFetch(const volatile T* ptr) { - return *ptr; -} - -#ifdef CUDA_HAS_HALF -template<> inline __device__ -half vFetch<half>(const volatile half* ptr) { - half r; - r.x = ptr->x; - return r; -} -#endif - -template<typename T> inline __device__ -void vStore(volatile T* ptr, const T val) { - *ptr = val; -} - -#ifdef CUDA_HAS_HALF -template<> inline __device__ -void vStore<half>(volatile half* ptr, const half val) { - ptr->x = val.x; -} -#endif - // Assumptions: // - there is exactly 1 block // - THREADS is the number of producer threads |