diff options
author | Ilya Biryukov <ibiryukov@google.com> | 2017-03-16 14:01:11 +0300 |
---|---|---|
committer | Ilya Biryukov <ibiryukov@google.com> | 2017-03-16 14:01:11 +0300 |
commit | 8241cd7b6ed1425eeb88fd380090575978e358f4 (patch) | |
tree | a686a7eadbaed0605d2bd0b4da08c5fe06816642 /src | |
parent | 7fef264bfa3fce60907b1cd6808257c64e222604 (diff) |
Fix compilation error when compiling with 'clang -x cuda'.
Functions vFetch and vStore are not found by ADL with clang,
so they need to be declared before usage in ReduceCopy.
Diffstat (limited to 'src')
-rw-r--r-- | src/common_kernel.h | 52 |
1 files changed, 26 insertions, 26 deletions
diff --git a/src/common_kernel.h b/src/common_kernel.h index 28fbc85..cc71f8a 100644 --- a/src/common_kernel.h +++ b/src/common_kernel.h @@ -30,6 +30,32 @@ #define BAR(type, barid, nthreads) \ BAR_EXPAND(type, barid, ROUNDUP(nthreads, WARP_SIZE)) +template<typename T> inline __device__ +T vFetch(const volatile T* ptr) { + return *ptr; +} + +#ifdef CUDA_HAS_HALF +template<> inline __device__ +half vFetch<half>(const volatile half* ptr) { + half r; + r.x = ptr->x; + return r; +} +#endif + +template<typename T> inline __device__ +void vStore(volatile T* ptr, const T val) { + *ptr = val; +} + +#ifdef CUDA_HAS_HALF +template<> inline __device__ +void vStore<half>(volatile half* ptr, const half val) { + ptr->x = val.x; +} +#endif + __device__ unsigned int spinct; // Spin wait until func evaluates to true @@ -225,32 +251,6 @@ __device__ inline volatile T* AlignUp(volatile T * ptr, size_t align) { return reinterpret_cast<volatile T*>(ALIGNUP(ptrval, align)); } -template<typename T> inline __device__ -T vFetch(const volatile T* ptr) { - return *ptr; -} - -#ifdef CUDA_HAS_HALF -template<> inline __device__ -half vFetch<half>(const volatile half* ptr) { - half r; - r.x = ptr->x; - return r; -} -#endif - -template<typename T> inline __device__ -void vStore(volatile T* ptr, const T val) { - *ptr = val; -} - -#ifdef CUDA_HAS_HALF -template<> inline __device__ -void vStore<half>(volatile half* ptr, const half val) { - ptr->x = val.x; -} -#endif - // Assumptions: // - there is exactly 1 block // - THREADS is the number of producer threads |