diff options
author | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2016-09-20 21:48:34 +0300 |
---|---|---|
committer | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2016-09-22 21:58:28 +0300 |
commit | 6c77476cc1aeb38a7666550813baaedde3dd32d1 (patch) | |
tree | 19e0db13491c733fecc267a85ece1ae24d14cdbf | |
parent | cabd6848e4c07e73f6db2cf74e3db0c1b7191fa9 (diff) |
Make tests check for deltas and report bandwidth
-rw-r--r-- | test/include/test_utilities.h | 152 | ||||
-rw-r--r-- | test/single/all_gather_test.cu | 19 | ||||
-rw-r--r-- | test/single/all_reduce_test.cu | 22 | ||||
-rw-r--r-- | test/single/broadcast_test.cu | 17 | ||||
-rw-r--r-- | test/single/reduce_scatter_test.cu | 20 | ||||
-rw-r--r-- | test/single/reduce_test.cu | 20 |
6 files changed, 236 insertions, 14 deletions
diff --git a/test/include/test_utilities.h b/test/include/test_utilities.h index 52d7fd0..c194205 100644 --- a/test/include/test_utilities.h +++ b/test/include/test_utilities.h @@ -9,6 +9,7 @@ #define SRC_TEST_UTILITIES_H_ #include <curand.h> +#include <cerrno> #include <string> #define CUDACHECK(cmd) do { \ @@ -135,6 +136,27 @@ void Randomize<half>(half* const dest, const int N, const int randomSeed) { } #endif +void makeRandom(void* ptr, int n, ncclDataType_t type, int seed) { + if (type == ncclChar) + Randomize<char>((char*)ptr, n, seed); + else if (type == ncclInt) + Randomize<int>((int*)ptr, n, seed); +#ifdef CUDA_HAS_HALF + else if (type == ncclHalf) + Randomize<half>((half*)ptr, n, seed); +#endif + else if (type == ncclFloat) + Randomize<float>((float*)ptr, n, seed); + else if (type == ncclDouble) + Randomize<double>((double*)ptr, n, seed); + else if (type == ncclInt64) + Randomize<long long>((long long*)ptr, n, seed); + else if (type == ncclUint64) + Randomize<unsigned long long>((unsigned long long*)ptr, n, seed); + + return; +} + template<typename T, int OP> __global__ static void accumKern(T* acum, const T* contrib, int N) { int tid = threadIdx.x + blockIdx.x*blockDim.x; @@ -201,21 +223,43 @@ void accumKern<half, ncclMin>(half* acum, const half* contrib, int N) { #endif template<typename T> +void accVecType(void* out, void* in, int n, ncclRedOp_t op) { + switch(op) { + case ncclSum: accumKern<T, ncclSum> <<<256,256>>>((T*)out, (T*)in, n); break; + case ncclProd: accumKern<T, ncclProd><<<256,256>>>((T*)out, (T*)in, n); break; + case ncclMax: accumKern<T, ncclMax> <<<256,256>>>((T*)out, (T*)in, n); break; + case ncclMin: accumKern<T, ncclMin> <<<256,256>>>((T*)out, (T*)in, n); break; + default: + printf("Unknown reduction operation.\n"); + exit(EXIT_FAILURE); + } +} + +template<typename T> void Accumulate(T* dest, const T* contrib, int N, ncclRedOp_t op) { T* devdest; CUDACHECK(cudaHostRegister(dest, N*sizeof(T), 0)); CUDACHECK(cudaHostGetDevicePointer(&devdest, dest, 0)); - switch(op) { - case ncclSum: accumKern<T, ncclSum> <<<256,256>>>(devdest, contrib, N); break; - case ncclProd: accumKern<T, ncclProd><<<256,256>>>(devdest, contrib, N); break; - case ncclMax: accumKern<T, ncclMax> <<<256,256>>>(devdest, contrib, N); break; - case ncclMin: accumKern<T, ncclMin> <<<256,256>>>(devdest, contrib, N); break; + accVecType<T>((void*)devdest, (void*)contrib, N, op); + CUDACHECK(cudaHostUnregister(dest)); +} + +void accVec(void* out, void* in, int n, ncclDataType_t type, ncclRedOp_t op) { + switch (type) { + case ncclChar: accVecType<char> (out, in, n, op); break; + case ncclInt: accVecType<int> (out, in, n, op); break; +#ifdef CUDA_HAS_HALF + case ncclHalf: accVecType<half> (out, in, n, op); break; +#endif + case ncclFloat: accVecType<float> (out, in, n, op); break; + case ncclDouble: accVecType<double> (out, in, n, op); break; + case ncclInt64: accVecType<long long> (out, in, n, op); break; + case ncclUint64: accVecType<unsigned long long> (out, in, n, op); break; default: - printf("Unknown reduction operation.\n"); + printf("Unknown reduction type.\n"); exit(EXIT_FAILURE); } - CUDACHECK(cudaHostUnregister(dest)); } template<typename T> __device__ @@ -270,6 +314,22 @@ double CheckDelta(const T* results, const T* expected, int N) { return maxerr; } +void maxDiff(double* max, void* first, void* second, int n, ncclDataType_t type, cudaStream_t s) { + switch (type) { + case ncclChar: deltaKern<char, 512> <<<1,512,0,s>>>((char*)first, (char*)second, n, max); break; + case ncclInt: deltaKern<int, 512> <<<1,512,0,s>>>((int*)first, (int*)second, n, max); break; +#ifdef CUDA_HAS_HALF + case ncclHalf: deltaKern<half, 512> <<<1,512,0,s>>>((half*)first, (half*)second, n, max); break; +#endif + case ncclFloat: deltaKern<float, 512> <<<1,512,0,s>>>((float*)first, (float*)second, n, max); break; + case ncclDouble: deltaKern<double, 512> <<<1,512,0,s>>>((double*)first, (double*)second, n, max); break; + case ncclInt64: deltaKern<long long, 512> <<<1,512,0,s>>>((long long*)first, (long long*)second, n, max); break; + case ncclUint64: deltaKern<unsigned long long, 512><<<1,512,0,s>>>((unsigned long long*)first, (unsigned long long*)second, n, max); break; + default: + printf("Unknown reduction type.\n"); + exit(EXIT_FAILURE); + } +} std::string TypeName(const ncclDataType_t type) { switch (type) { @@ -296,5 +356,83 @@ std::string OperationName(const ncclRedOp_t op) { } } +ncclDataType_t strToType(const char* s) { + if (strcmp(s, "char") == 0) + return ncclChar; + if (strcmp(s, "int") == 0) + return ncclInt; +#ifdef CUDA_HAS_HALF + if (strcmp(s, "half") == 0) + return ncclHalf; +#endif + if (strcmp(s, "float") == 0) + return ncclFloat; + if (strcmp(s, "double") == 0) + return ncclDouble; + if (strcmp(s, "int64") == 0) + return ncclInt64; + if (strcmp(s, "uint64") == 0) + return ncclUint64; + + return nccl_NUM_TYPES; +} + +size_t wordSize(ncclDataType_t type) { + switch(type) { + case ncclChar: return sizeof(char); + case ncclInt: return sizeof(int); +#ifdef CUDA_HAS_HALF + case ncclHalf: return sizeof(short); +#endif + case ncclFloat: return sizeof(float); + case ncclDouble: return sizeof(double); + case ncclInt64: return sizeof(long long); + case ncclUint64: return sizeof(unsigned long long); + } + + return 0; +} + +double deltaMaxValue(ncclDataType_t type, bool is_reduction) { + if (is_reduction) { + switch(type) { +#ifdef CUDA_HAS_HALF + case ncclHalf: return 5e-2; +#endif + case ncclFloat: return 1e-5; + case ncclDouble: return 1e-12; + } + } + return 1e-200; +} + +ncclRedOp_t strToOp(const char* s) { + if (strcmp(s, "sum") == 0) + return ncclSum; + if (strcmp(s, "prod") == 0) + return ncclProd; + if (strcmp(s, "max") == 0) + return ncclMax; + if (strcmp(s, "min") == 0) + return ncclMin; + + return nccl_NUM_OPS; +} + +int strToPosInt(const char* s) { + errno = 0; + long temp = strtol(s, NULL, 10); + if (errno != 0 || temp > INT_MAX || temp < 0) + return 0; + return (int)temp; +} + +int strToNonNeg(const char* s) { + errno = 0; + long temp = strtol(s, NULL, 10); + if (errno != 0 || temp > INT_MAX || temp < 0) + return -1; + return (int)temp; +} #endif // SRC_TEST_UTILITIES_H_ diff --git a/test/single/all_gather_test.cu b/test/single/all_gather_test.cu index 11496e1..ba3841f 100644 --- a/test/single/all_gather_test.cu +++ b/test/single/all_gather_test.cu @@ -13,6 +13,9 @@ #include "nccl.h" #include "test_utilities.h" +int errors = 0; +double min_bw = 10000.0; +bool is_reduction = false; template<typename T> void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, @@ -84,6 +87,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf(" %7.3f %5.2f %5.2f %7.0le\n", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; } for (int i = 0; i < nDev; ++i) { @@ -197,7 +203,7 @@ int main(int argc, char* argv[]) { RunTests<char>(N / sizeof(char), ncclChar, comms, dList); RunTests<int>(N / sizeof(int), ncclInt, comms, dList); -#if CUDART_VERSION >= 7050 +#ifdef CUDA_HAS_HALF RunTests<half>(N / sizeof(half), ncclHalf, comms, dList); #endif RunTests<float>(N / sizeof(float), ncclFloat, comms, dList); @@ -211,6 +217,15 @@ int main(int argc, char* argv[]) { ncclCommDestroy(comms[i]); free(comms); - exit(EXIT_SUCCESS); + char* str = getenv("NCCL_TESTS_MIN_BW"); + double check_min_bw = str ? atof(str) : -1; + + printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); + printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf("\n"); + if (errors || min_bw < check_min_bw) + exit(EXIT_FAILURE); + else + exit(EXIT_SUCCESS); } diff --git a/test/single/all_reduce_test.cu b/test/single/all_reduce_test.cu index cebc198..642be80 100644 --- a/test/single/all_reduce_test.cu +++ b/test/single/all_reduce_test.cu @@ -15,6 +15,9 @@ #include <nvToolsExt.h> int csv = false; +int errors = 0; +double min_bw = 10000.0; +bool is_reduction = true; template<typename T> void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, @@ -95,6 +98,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf((csv)?"%f,%f,%f,%le,":" %7.3f %5.2f %5.2f %7.0le", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; + nvtxRangePop(); } @@ -138,6 +144,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf((csv)?"%f,%f,%f,%le,":" %7.3f %5.2f %5.2f %7.0le\n", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; + nvtxRangePop(); } @@ -260,7 +269,7 @@ int main(int argc, char* argv[]) { RunTests<char>(N / sizeof(char), ncclChar, comms, dList); RunTests<int>(N / sizeof(int), ncclInt, comms, dList); -#if CUDART_VERSION >= 7050 +#ifdef CUDA_HAS_HALF RunTests<half>(N / sizeof(half), ncclHalf, comms, dList); #endif RunTests<float>(N / sizeof(float), ncclFloat, comms, dList); @@ -274,6 +283,15 @@ int main(int argc, char* argv[]) { ncclCommDestroy(comms[i]); free(comms); - exit(EXIT_SUCCESS); + char* str = getenv("NCCL_TESTS_MIN_BW"); + double check_min_bw = str ? atof(str) : -1; + + printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); + printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf("\n"); + if (errors || min_bw < check_min_bw) + exit(EXIT_FAILURE); + else + exit(EXIT_SUCCESS); } diff --git a/test/single/broadcast_test.cu b/test/single/broadcast_test.cu index 4955f07..30afebd 100644 --- a/test/single/broadcast_test.cu +++ b/test/single/broadcast_test.cu @@ -13,6 +13,9 @@ #include "nccl.h" #include "test_utilities.h" +int errors = 0; +double min_bw = 10000.0; +bool is_reduction = false; template<typename T> void RunTest(T** buff, const int N, const ncclDataType_t type, const int root, @@ -86,6 +89,9 @@ void RunTest(T** buff, const int N, const ncclDataType_t type, const int root, printf(" %7.3f %5.2f %5.2f %7.0le\n", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; } for(int i=0; i < nDev; ++i) { @@ -211,6 +217,15 @@ int main(int argc, char* argv[]) { ncclCommDestroy(comms[i]); free(comms); - exit(EXIT_SUCCESS); + char* str = getenv("NCCL_TESTS_MIN_BW"); + double check_min_bw = str ? atof(str) : -1; + + printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); + printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf("\n"); + if (errors || min_bw < check_min_bw) + exit(EXIT_FAILURE); + else + exit(EXIT_SUCCESS); } diff --git a/test/single/reduce_scatter_test.cu b/test/single/reduce_scatter_test.cu index e6a56fe..81f3004 100644 --- a/test/single/reduce_scatter_test.cu +++ b/test/single/reduce_scatter_test.cu @@ -13,6 +13,9 @@ #include "nccl.h" #include "test_utilities.h" +int errors = 0; +double min_bw = 10000.0; +bool is_reduction = true; template<typename T> void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, @@ -90,6 +93,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf(" %7.3f %5.2f %5.2f %7.0le", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; } { @@ -126,6 +132,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf(" %7.3f %5.2f %5.2f %7.0le\n", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; } for (int i = 0; i < nDev; ++i) { @@ -258,6 +267,15 @@ int main(int argc, char* argv[]) { ncclCommDestroy(comms[i]); free(comms); - exit(EXIT_SUCCESS); + char* str = getenv("NCCL_TESTS_MIN_BW"); + double check_min_bw = str ? atof(str) : -1; + + printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); + printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf("\n"); + if (errors || min_bw < check_min_bw) + exit(EXIT_FAILURE); + else + exit(EXIT_SUCCESS); } diff --git a/test/single/reduce_test.cu b/test/single/reduce_test.cu index dbe99c0..aa0d20f 100644 --- a/test/single/reduce_test.cu +++ b/test/single/reduce_test.cu @@ -15,6 +15,9 @@ #include <nvToolsExt.h> int csv = false; +int errors = 0; +double min_bw = 10000.0; +bool is_reduction = true; template<typename T> void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, @@ -94,6 +97,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf((csv)?"%f,%f,%f,%le,":" %7.3f %5.2f %5.2f %7.0le", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; + nvtxRangePop(); } @@ -133,6 +139,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf((csv)?"%f,%f,%f,%le,":" %7.3f %5.2f %5.2f %7.0le\n", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; + nvtxRangePop(); } @@ -272,6 +281,15 @@ int main(int argc, char* argv[]) { ncclCommDestroy(comms[i]); free(comms); - exit(EXIT_SUCCESS); + char* str = getenv("NCCL_TESTS_MIN_BW"); + double check_min_bw = str ? atof(str) : -1; + + printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); + printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf("\n"); + if (errors || min_bw < check_min_bw) + exit(EXIT_FAILURE); + else + exit(EXIT_SUCCESS); } |