diff options
author | Simon Layton <slayton58@gmail.com> | 2015-12-04 00:35:54 +0300 |
---|---|---|
committer | Simon Layton <slayton58@gmail.com> | 2015-12-04 21:28:36 +0300 |
commit | 41ce4ca9fc748186190e63823b4f1cf5c365b220 (patch) | |
tree | 4f2ea5b962240bcdc20c4d0a56982728c907f750 /src | |
parent | 27d32ac5d93f86bfb406551fda0ea3edeafdb199 (diff) |
Add int64 and uint64 types for all algorithms and tests
Diffstat (limited to 'src')
-rw-r--r-- | src/all_gather.cu | 6 | ||||
-rw-r--r-- | src/all_gather_test.cu | 2 | ||||
-rw-r--r-- | src/all_reduce.cu | 6 | ||||
-rw-r--r-- | src/all_reduce_test.cu | 2 | ||||
-rw-r--r-- | src/broadcast.cu | 4 | ||||
-rw-r--r-- | src/broadcast_test.cu | 2 | ||||
-rw-r--r-- | src/common_kernel.h | 20 | ||||
-rw-r--r-- | src/nccl.h | 4 | ||||
-rw-r--r-- | src/reduce.cu | 4 | ||||
-rw-r--r-- | src/reduce_scatter.cu | 6 | ||||
-rw-r--r-- | src/reduce_scatter_test.cu | 2 | ||||
-rw-r--r-- | src/reduce_test.cu | 2 | ||||
-rw-r--r-- | src/test_utilities.h | 26 |
13 files changed, 85 insertions, 1 deletions
diff --git a/src/all_gather.cu b/src/all_gather.cu index a83385f..0f90efd 100644 --- a/src/all_gather.cu +++ b/src/all_gather.cu @@ -477,6 +477,12 @@ public: case ncclDouble: return ncclAllGatherWithType<double>(sendbuff, recvbuff, count, comm, numUnroll, stream); + case ncclInt64: + return ncclAllGatherWithType<long long>(sendbuff, recvbuff, count, comm, + numUnroll, stream); + case ncclUint64: + return ncclAllGatherWithType<unsigned long long>(sendbuff, recvbuff, count, comm, + numUnroll, stream); } return ncclInvalidType; } diff --git a/src/all_gather_test.cu b/src/all_gather_test.cu index a928806..a9e1c1e 100644 --- a/src/all_gather_test.cu +++ b/src/all_gather_test.cu @@ -224,6 +224,8 @@ int main(int argc, char* argv[]) { #endif RunTests<float>(N / sizeof(float), ncclFloat, comms, dList); RunTests<double>(N / sizeof(double), ncclDouble, comms, dList); + RunTests<long long>(N / sizeof(long long), ncclInt64, comms, dList); + RunTests<unsigned long long>(N / sizeof(unsigned long long), ncclUint64, comms, dList); printf("\n"); diff --git a/src/all_reduce.cu b/src/all_reduce.cu index cf84de0..670d45c 100644 --- a/src/all_reduce.cu +++ b/src/all_reduce.cu @@ -489,6 +489,12 @@ public: case ncclDouble: return ncclAllReduceWithType<double>(sendbuff, recvbuff, count, op, comm, stream); + case ncclInt64: + return ncclAllReduceWithType<long long>(sendbuff, recvbuff, count, op, + comm, stream); + case ncclUint64: + return ncclAllReduceWithType<unsigned long long int>(sendbuff, recvbuff, count, op, + comm, stream); } return ncclInvalidType; diff --git a/src/all_reduce_test.cu b/src/all_reduce_test.cu index aa18697..f46bd48 100644 --- a/src/all_reduce_test.cu +++ b/src/all_reduce_test.cu @@ -287,6 +287,8 @@ int main(int argc, char* argv[]) { #endif RunTests<float>(N / sizeof(float), ncclFloat, comms, dList); RunTests<double>(N / sizeof(double), ncclDouble, comms, dList); + RunTests<long long>(N / sizeof(long long), ncclInt64, comms, dList); + RunTests<unsigned long long>(N / sizeof(unsigned long long), ncclUint64, comms, dList); printf("\n"); diff --git a/src/broadcast.cu b/src/broadcast.cu index cde9c9e..c3e4c20 100644 --- a/src/broadcast.cu +++ b/src/broadcast.cu @@ -396,6 +396,10 @@ public: return ncclBcastWithType<float>(buff, count, root, comm, numUnroll, stream); case ncclDouble: return ncclBcastWithType<double>(buff, count, root, comm, numUnroll, stream); + case ncclInt64: + return ncclBcastWithType<long long>(buff, count, root, comm, numUnroll, stream); + case ncclUint64: + return ncclBcastWithType<unsigned long long>(buff, count, root, comm, numUnroll, stream); } return ncclInvalidType; } diff --git a/src/broadcast_test.cu b/src/broadcast_test.cu index 344ca7f..9c85a1f 100644 --- a/src/broadcast_test.cu +++ b/src/broadcast_test.cu @@ -224,6 +224,8 @@ int main(int argc, char* argv[]) { #endif RunTests<float>(N / sizeof(float), ncclFloat, comms, dList); RunTests<double>(N / sizeof(double), ncclDouble, comms, dList); + RunTests<long long>(N / sizeof(long long), ncclInt64, comms, dList); + RunTests<unsigned long long>(N / sizeof(unsigned long long), ncclUint64, comms, dList); printf("\n"); diff --git a/src/common_kernel.h b/src/common_kernel.h index 5b6770a..e30bf5c 100644 --- a/src/common_kernel.h +++ b/src/common_kernel.h @@ -174,6 +174,26 @@ struct MULTI<FUNC, double> { } }; +template<class FUNC> +struct MULTI<FUNC, unsigned long long> { + static_assert(sizeof(PackType) == sizeof(unsigned long long), + "PackType must be the same size as unsigned long long."); + __device__ PackType operator()(const PackType x, const PackType y) const { + unsigned long long rv = FUNC()(x, y); + return rv; + } +}; + +template<class FUNC> +struct MULTI<FUNC, long long> { + static_assert(sizeof(PackType) == sizeof(long long), + "PackType must be the same size as long long."); + __device__ PackType operator()(const PackType x, const PackType y) const { + long long rv = FUNC()((long long)x, (long long)y); + return rv; + } +}; + template<typename T, bool FETCHTWO> __device__ inline void FetchOneOrTwo64b(PackType& s0, const volatile T * __restrict__ const src0, PackType& s1, @@ -117,7 +117,9 @@ typedef enum { ncclChar = 0, #endif ncclFloat = 3, ncclDouble = 4, - nccl_NUM_TYPES = 5 } ncclDataType_t; + ncclInt64 = 5, + ncclUint64 = 6, + nccl_NUM_TYPES = 7 } ncclDataType_t; /* Reduces data arrays of length count in sendbuff into recvbuf using op operation. * recvbuf may be NULL on all calls except for root device. diff --git a/src/reduce.cu b/src/reduce.cu index 2863e2a..6752d24 100644 --- a/src/reduce.cu +++ b/src/reduce.cu @@ -393,6 +393,10 @@ public: return ncclReduceWithType<float>(sendbuff, recvbuff, count, op, root, comm, stream); case ncclDouble: return ncclReduceWithType<double>(sendbuff, recvbuff, count, op, root, comm, stream); + case ncclInt64: + return ncclReduceWithType<long long>(sendbuff, recvbuff, count, op, root, comm, stream); + case ncclUint64: + return ncclReduceWithType<unsigned long long>(sendbuff, recvbuff, count, op, root, comm, stream); } return ncclInvalidType; } diff --git a/src/reduce_scatter.cu b/src/reduce_scatter.cu index 3419caa..e1860c5 100644 --- a/src/reduce_scatter.cu +++ b/src/reduce_scatter.cu @@ -474,6 +474,12 @@ public: case ncclDouble: return ncclReduceScatterWithType<double>(sendbuff, recvbuff, recvcount, op, comm, stream); + case ncclInt64: + return ncclReduceScatterWithType<long long>(sendbuff, recvbuff, recvcount, + op, comm, stream); + case ncclUint64: + return ncclReduceScatterWithType<unsigned long long>(sendbuff, recvbuff, recvcount, + op, comm, stream); } return ncclInvalidType; } diff --git a/src/reduce_scatter_test.cu b/src/reduce_scatter_test.cu index c1c87be..da205d5 100644 --- a/src/reduce_scatter_test.cu +++ b/src/reduce_scatter_test.cu @@ -271,6 +271,8 @@ int main(int argc, char* argv[]) { #endif RunTests<float>(N / sizeof(float), ncclFloat, comms, dList); RunTests<double>(N / sizeof(double), ncclDouble, comms, dList); + RunTests<long long>(N / sizeof(long long), ncclInt64, comms, dList); + RunTests<unsigned long long>(N / sizeof(unsigned long long), ncclUint64, comms, dList); printf("\n"); diff --git a/src/reduce_test.cu b/src/reduce_test.cu index fc06225..ce17e32 100644 --- a/src/reduce_test.cu +++ b/src/reduce_test.cu @@ -285,6 +285,8 @@ int main(int argc, char* argv[]) { #endif RunTests<float>(N / sizeof(float), ncclFloat, comms, dList); RunTests<double>(N / sizeof(double), ncclDouble, comms, dList); + RunTests<long long>(N / sizeof(long long), ncclInt64, comms, dList); + RunTests<unsigned long long>(N / sizeof(unsigned long long), ncclUint64, comms, dList); printf("\n"); diff --git a/src/test_utilities.h b/src/test_utilities.h index ecf760c..a5d3661 100644 --- a/src/test_utilities.h +++ b/src/test_utilities.h @@ -89,6 +89,12 @@ void GenerateRandom<double>(curandGenerator_t generator, double * const dest, CURAND_CHK(curandGenerateUniformDouble(generator, dest, N)); } +template<> +void GenerateRandom<unsigned long long>(curandGenerator_t generator, unsigned long long * const dest, + const int N) { + CURAND_CHK(curandGenerateLongLong(generator, dest, N)); +} + template<typename T> void Randomize(T* const dest, const int N, const int randomSeed) { @@ -100,6 +106,24 @@ void Randomize(T* const dest, const int N, const int randomSeed) { CUDACHECK(cudaDeviceSynchronize()); } +template<> +void Randomize(unsigned long long* const dest, const int N, const int randomSeed) { + curandGenerator_t gen; + CURAND_CHK(curandCreateGenerator(&gen, CURAND_RNG_QUASI_SOBOL64)); + GenerateRandom<unsigned long long>(gen, dest, N); + CURAND_CHK(curandDestroyGenerator(gen)); + CUDACHECK(cudaDeviceSynchronize()); +} + +template<> +void Randomize(long long* const dest, const int N, const int randomSeed) { + curandGenerator_t gen; + CURAND_CHK(curandCreateGenerator(&gen, CURAND_RNG_QUASI_SOBOL64)); + GenerateRandom<unsigned long long>(gen, (unsigned long long *)dest, N); + CURAND_CHK(curandDestroyGenerator(gen)); + CUDACHECK(cudaDeviceSynchronize()); +} + #ifdef CUDA_HAS_HALF __global__ void halve(const float * src, half* dest, int N) { for(int tid = threadIdx.x + blockIdx.x*blockDim.x; @@ -268,6 +292,8 @@ std::string TypeName(const ncclDataType_t type) { #endif case ncclFloat: return "float"; case ncclDouble: return "double"; + case ncclInt64: return "int64"; + case ncclUint64: return "uint64"; default: return "unknown"; } } |