Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorNathan Luehr <nluehr@nvidia.com>2016-01-22 03:30:05 +0300
committerPrzemek Tredak <ptredak@nvidia.com>2016-01-29 22:09:05 +0300
commit27583533805dde8fd5e4514d25531b8347fd985b (patch)
tree1adb7db3bc2efdd11f85a7515f5b8c9b1e8d6cf7 /test
parentfe1a9567155c8965425f884c552496ebd960ff42 (diff)
Added NCCL error checking to tests.
Also cleaned up makefile so that tests and lib are not built unnecessarily. Change-Id: Ia0c596cc2213628de2f066be97615c09bb1bb262 Reviewed-on: http://git-master/r/999627 Reviewed-by: Przemek Tredak <ptredak@nvidia.com> Tested-by: Przemek Tredak <ptredak@nvidia.com>
Diffstat (limited to 'test')
-rw-r--r--test/include/test_utilities.h25
-rw-r--r--test/mpi/mpi_test.cu18
-rw-r--r--test/single/all_gather_test.cu20
-rw-r--r--test/single/all_reduce_test.cu20
-rw-r--r--test/single/broadcast_test.cu16
-rw-r--r--test/single/reduce_scatter_test.cu24
-rw-r--r--test/single/reduce_test.cu22
7 files changed, 73 insertions, 72 deletions
diff --git a/test/include/test_utilities.h b/test/include/test_utilities.h
index c929a9e..fb34d19 100644
--- a/test/include/test_utilities.h
+++ b/test/include/test_utilities.h
@@ -32,14 +32,23 @@
#include <curand.h>
-#define CUDACHECK(cmd) do { \
- cudaError_t e = cmd; \
- if( e != cudaSuccess ) { \
- printf("Cuda failure %s:%d '%s'\n", \
- __FILE__,__LINE__,cudaGetErrorString(e)); \
- exit(EXIT_FAILURE); \
- } \
-} while(false)
+#define CUDACHECK(cmd) do { \
+ cudaError_t e = cmd; \
+ if( e != cudaSuccess ) { \
+ printf("Cuda failure %s:%d '%s'\n", \
+ __FILE__,__LINE__,cudaGetErrorString(e)); \
+ exit(EXIT_FAILURE); \
+ } \
+} while(0)
+
+#define NCCLCHECK(cmd) do { \
+ ncclResult_t r = cmd; \
+ if (r!= ncclSuccess) { \
+ printf("NCCL failure %s:%d '%s'\n", \
+ __FILE__,__LINE__,ncclGetErrorString(r)); \
+ exit(EXIT_FAILURE); \
+ } \
+} while(0)
template<typename T>
void Randomize(T* const dest, const int N, const int randomSeed);
diff --git a/test/mpi/mpi_test.cu b/test/mpi/mpi_test.cu
index 87465e5..54ebbce 100644
--- a/test/mpi/mpi_test.cu
+++ b/test/mpi/mpi_test.cu
@@ -32,15 +32,7 @@
#include "nccl.h"
#include "mpi.h"
-
-#define CUDACHECK(cmd) do { \
- cudaError_t e = cmd; \
- if( e != cudaSuccess ) { \
- printf("Cuda failure %s:%d '%s'\n", \
- __FILE__,__LINE__,cudaGetErrorString(e)); \
- exit(EXIT_FAILURE); \
- } \
-} while(false)
+#include "test_utilities.h"
#define SIZE 128
#define NITERS 1
@@ -48,7 +40,7 @@
int main(int argc, char *argv[]) {
ncclUniqueId commId;
int size, rank;
- int ret;
+ ncclResult_t ret;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
@@ -66,11 +58,11 @@ int main(int argc, char *argv[]) {
// NCCL Communicator creation
ncclComm_t comm;
- ncclGetUniqueId(&commId);
+ NCCLCHECK(ncclGetUniqueId(&commId));
MPI_Bcast(&commId, NCCL_UNIQUE_ID_BYTES, MPI_CHAR, 0, MPI_COMM_WORLD);
ret = ncclCommInitRank(&comm, size, commId, rank);
if (ret != ncclSuccess) {
- printf("NCCL Init failed : %d\n", ret);
+ printf("NCCL Init failed (%d) '%s'\n", ret, ncclGetErrorString(ret));
exit(1);
}
@@ -93,7 +85,7 @@ int main(int argc, char *argv[]) {
// Run allreduce
int errors = 0;
for (int i=0; i<NITERS; i++) {
- ncclAllReduce((const void*)dptr, (void*)(dptr+SIZE), SIZE, ncclInt, ncclSum, comm, stream);
+ NCCLCHECK(ncclAllReduce((const void*)dptr, (void*)(dptr+SIZE), SIZE, ncclInt, ncclSum, comm, stream));
}
// Check results
diff --git a/test/single/all_gather_test.cu b/test/single/all_gather_test.cu
index a9e1c1e..0925b7e 100644
--- a/test/single/all_gather_test.cu
+++ b/test/single/all_gather_test.cu
@@ -1,5 +1,5 @@
/*************************************************************************
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -41,7 +41,7 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
ncclComm_t* const comms, const std::vector<int>& dList) {
// initialize data
int nDev = 0;
- ncclCommCount(comms[0], &nDev);
+ NCCLCHECK(ncclCommCount(comms[0], &nDev));
cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev);
T* buffer = (T*)malloc(nDev * N * sizeof(T));
T* result = (T*)malloc(nDev * N * sizeof(T));
@@ -61,8 +61,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
// warm up GPU
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclAllGather((const void*)sendbuff[i], std::min(32 * 1024, N), type,
- (void*)recvbuff[i], comms[i], s[i]);
+ NCCLCHECK(ncclAllGather((const void*)sendbuff[i], std::min(32 * 1024, N), type,
+ (void*)recvbuff[i], comms[i], s[i]));
}
for (int i = 0; i < nDev; ++i) {
@@ -79,8 +79,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclAllGather((const void*)sendbuff[i], n, type, (void*)recvbuff[i], comms[i],
- s[i]);
+ NCCLCHECK(ncclAllGather((const void*)sendbuff[i], n, type, (void*)recvbuff[i], comms[i],
+ s[i]));
}
for (int i = 0; i < nDev; ++i) {
@@ -121,7 +121,7 @@ template<typename T>
void RunTests(const int N, const ncclDataType_t type, ncclComm_t* const comms,
const std::vector<int>& dList) {
int nDev = 0;
- ncclCommCount(comms[0], &nDev);
+ NCCLCHECK(ncclCommCount(comms[0], &nDev));
T** sendbuff = (T**)malloc(nDev * sizeof(T*));
T** recvbuff = (T**)malloc(nDev * sizeof(T*));
@@ -199,15 +199,15 @@ int main(int argc, char* argv[]) {
}
ncclComm_t* comms = (ncclComm_t*)malloc(sizeof(ncclComm_t)*nDev);
- ncclCommInitAll(comms, nDev, dList.data());
+ NCCLCHECK(ncclCommInitAll(comms, nDev, dList.data()));
printf("# Using devices\n");
for (int g=0; g<nDev; ++g) {
int cudaDev;
int rank;
cudaDeviceProp prop;
- ncclCommCuDevice(comms[g], &cudaDev);
- ncclCommUserRank(comms[g], &rank);
+ NCCLCHECK(ncclCommCuDevice(comms[g], &cudaDev));
+ NCCLCHECK(ncclCommUserRank(comms[g], &rank));
CUDACHECK(cudaGetDeviceProperties(&prop, cudaDev));
printf("# Rank %2d uses device %2d [0x%02x] %s\n", rank, cudaDev,
prop.pciBusID, prop.name);
diff --git a/test/single/all_reduce_test.cu b/test/single/all_reduce_test.cu
index a2fcb3d..2e14335 100644
--- a/test/single/all_reduce_test.cu
+++ b/test/single/all_reduce_test.cu
@@ -48,7 +48,7 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
memset(result, 0, N * sizeof(T));
int nDev = 0;
- ncclCommCount(comms[0], &nDev);
+ NCCLCHECK(ncclCommCount(comms[0], &nDev));
cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev);
for (int i = 0; i < nDev; ++i) {
@@ -66,7 +66,7 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
// warm up GPU
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclAllReduce((const void*)sendbuff[i], (void*)recvbuff[i], std::min(N, 1024 * 1024), type, op, comms[i], s[i]);
+ NCCLCHECK(ncclAllReduce((const void*)sendbuff[i], (void*)recvbuff[i], std::min(N, 1024 * 1024), type, op, comms[i], s[i]));
}
for (int i = 0; i < nDev; ++i) {
@@ -87,8 +87,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
//for (int i=0; i<100; i++) {
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclAllReduce((const void*)sendbuff[i], (void*)recvbuff[i], n, type, op,
- comms[i], s[i]);
+ NCCLCHECK(ncclAllReduce((const void*)sendbuff[i], (void*)recvbuff[i], n, type, op,
+ comms[i], s[i]));
}
//}
@@ -130,8 +130,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
//for (int i=0; i<100; i++) {
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclAllReduce((const void*)sendbuff[i], (void*)sendbuff[i], n, type, op,
- comms[i], s[i]);
+ NCCLCHECK(ncclAllReduce((const void*)sendbuff[i], (void*)sendbuff[i], n, type, op,
+ comms[i], s[i]));
}
//}
@@ -176,7 +176,7 @@ template<typename T>
void RunTests(const int N, const ncclDataType_t type, ncclComm_t* comms,
const std::vector<int>& dList) {
int nDev = 0;
- ncclCommCount(comms[0], &nDev);
+ NCCLCHECK(ncclCommCount(comms[0], &nDev));
T** sendbuff = (T**)malloc(nDev * sizeof(T*));
T** recvbuff = (T**)malloc(nDev * sizeof(T*));
@@ -256,7 +256,7 @@ int main(int argc, char* argv[]) {
}
ncclComm_t* comms = (ncclComm_t*)malloc(sizeof(ncclComm_t)*nDev);
- ncclCommInitAll(comms, nDev, dList.data());
+ NCCLCHECK(ncclCommInitAll(comms, nDev, dList.data()));
if (!csv) {
printf("# Using devices\n");
@@ -264,8 +264,8 @@ int main(int argc, char* argv[]) {
int cudaDev;
int rank;
cudaDeviceProp prop;
- ncclCommCuDevice(comms[g], &cudaDev);
- ncclCommUserRank(comms[g], &rank);
+ NCCLCHECK(ncclCommCuDevice(comms[g], &cudaDev));
+ NCCLCHECK(ncclCommUserRank(comms[g], &rank));
CUDACHECK(cudaGetDeviceProperties(&prop, cudaDev));
printf("# Rank %2d uses device %2d [0x%02x] %s\n", rank, cudaDev,
prop.pciBusID, prop.name);
diff --git a/test/single/broadcast_test.cu b/test/single/broadcast_test.cu
index 9c85a1f..9801d04 100644
--- a/test/single/broadcast_test.cu
+++ b/test/single/broadcast_test.cu
@@ -1,5 +1,5 @@
/*************************************************************************
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -41,7 +41,7 @@ void RunTest(T** buff, const int N, const ncclDataType_t type, const int root,
ncclComm_t* const comms, const std::vector<int>& dList) {
// initialize data
int nDev = 0;
- ncclCommCount(comms[0], &nDev);
+ NCCLCHECK(ncclCommCount(comms[0], &nDev));
cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev);
T* buffer = (T*)malloc(N * sizeof(T));
T* result = (T*)malloc(N * sizeof(T));
@@ -65,7 +65,7 @@ void RunTest(T** buff, const int N, const ncclDataType_t type, const int root,
// warm up GPU
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclBcast((void*)buff[i], std::min(32 * 1024, N), type, root, comms[i], s[i]);
+ NCCLCHECK(ncclBcast((void*)buff[i], std::min(32 * 1024, N), type, root, comms[i], s[i]));
}
for (int i = 0; i < nDev; ++i) {
@@ -83,7 +83,7 @@ void RunTest(T** buff, const int N, const ncclDataType_t type, const int root,
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclBcast((void*)buff[i], n, type, root, comms[i], s[i]);
+ NCCLCHECK(ncclBcast((void*)buff[i], n, type, root, comms[i], s[i]));
}
for (int i = 0; i < nDev; ++i) {
@@ -123,7 +123,7 @@ template<typename T>
void RunTests(const int N, const ncclDataType_t type, ncclComm_t* const comms,
const std::vector<int>& dList) {
int nDev = 0;
- ncclCommCount(comms[0], &nDev);
+ NCCLCHECK(ncclCommCount(comms[0], &nDev));
T** buff = (T**)malloc(nDev * sizeof(T*));
for (int i = 0; i < nDev; ++i) {
@@ -199,15 +199,15 @@ int main(int argc, char* argv[]) {
}
ncclComm_t* comms = (ncclComm_t*)malloc(sizeof(ncclComm_t)*nDev);;
- ncclCommInitAll(comms, nDev, dList.data());
+ NCCLCHECK(ncclCommInitAll(comms, nDev, dList.data()));
printf("# Using devices\n");
for (int g = 0; g < nDev; ++g) {
int cudaDev;
int rank;
cudaDeviceProp prop;
- ncclCommCuDevice(comms[g], &cudaDev);
- ncclCommUserRank(comms[g], &rank);
+ NCCLCHECK(ncclCommCuDevice(comms[g], &cudaDev));
+ NCCLCHECK(ncclCommUserRank(comms[g], &rank));
CUDACHECK(cudaGetDeviceProperties(&prop, cudaDev));
printf("# Rank %2d uses device %2d [0x%02x] %s\n", rank, cudaDev,
prop.pciBusID, prop.name);
diff --git a/test/single/reduce_scatter_test.cu b/test/single/reduce_scatter_test.cu
index da205d5..4fc3292 100644
--- a/test/single/reduce_scatter_test.cu
+++ b/test/single/reduce_scatter_test.cu
@@ -1,5 +1,5 @@
/*************************************************************************
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -41,7 +41,7 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
const ncclRedOp_t op, ncclComm_t* const comms, const std::vector<int>& dList) {
// initialize data
int nDev = 0;
- ncclCommCount(comms[0], &nDev);
+ NCCLCHECK(ncclCommCount(comms[0], &nDev));
cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev);
T* buffer = (T*)malloc(N * nDev * sizeof(T));
@@ -66,8 +66,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
// warm up GPU
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclReduceScatter((const void*)sendbuff[i], (void*)recvbuff[i],
- std::min(N, 1024 * 1024), type, op, comms[i], s[i]);
+ NCCLCHECK(ncclReduceScatter((const void*)sendbuff[i], (void*)recvbuff[i],
+ std::min(N, 1024 * 1024), type, op, comms[i], s[i]));
}
for (int i = 0; i < nDev; ++i) {
@@ -86,8 +86,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclReduceScatter((const void*)sendbuff[i], (void*)recvbuff[i], n, type,
- op, comms[i], s[i]);
+ NCCLCHECK(ncclReduceScatter((const void*)sendbuff[i], (void*)recvbuff[i], n, type,
+ op, comms[i], s[i]));
}
for (int i = 0; i < nDev; ++i) {
@@ -122,8 +122,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclReduceScatter((const void*)sendbuff[i], (void*)sendbuff[i], n, type,
- op, comms[i], s[i]);
+ NCCLCHECK(ncclReduceScatter((const void*)sendbuff[i], (void*)sendbuff[i], n, type,
+ op, comms[i], s[i]));
}
for (int i = 0; i < nDev; ++i) {
@@ -163,7 +163,7 @@ template<typename T>
void RunTests(const int N, const ncclDataType_t type, ncclComm_t* const comms,
const std::vector<int>& dList) {
int nDev = 0;
- ncclCommCount(comms[0], &nDev);
+ NCCLCHECK(ncclCommCount(comms[0], &nDev));
T** sendbuff = (T**)malloc(nDev * sizeof(T*));
T** recvbuff = (T**)malloc(nDev * sizeof(T*));
@@ -243,15 +243,15 @@ int main(int argc, char* argv[]) {
}
ncclComm_t* comms = (ncclComm_t*)malloc(sizeof(ncclComm_t)*nDev);
- ncclCommInitAll(comms, nDev, dList.data());
+ NCCLCHECK(ncclCommInitAll(comms, nDev, dList.data()));
printf("# Using devices\n");
for (int g = 0; g < nDev; ++g) {
int cudaDev;
int rank;
cudaDeviceProp prop;
- ncclCommCuDevice(comms[g], &cudaDev);
- ncclCommUserRank(comms[g], &rank);
+ NCCLCHECK(ncclCommCuDevice(comms[g], &cudaDev));
+ NCCLCHECK(ncclCommUserRank(comms[g], &rank));
CUDACHECK(cudaGetDeviceProperties(&prop, cudaDev));
printf("# Rank %2d uses device %2d [0x%02x] %s\n", rank, cudaDev,
prop.pciBusID, prop.name);
diff --git a/test/single/reduce_test.cu b/test/single/reduce_test.cu
index 42b1e9b..9500c18 100644
--- a/test/single/reduce_test.cu
+++ b/test/single/reduce_test.cu
@@ -50,7 +50,7 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
memset(result, 0, N * sizeof(T));
int nDev = 0;
- ncclCommCount(comms[0], &nDev);
+ NCCLCHECK(ncclCommCount(comms[0], &nDev));
cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev);
for (int i = 0; i < nDev; ++i) {
@@ -68,8 +68,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
// warm up GPU
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclReduce((const void*)sendbuff[i], (void*)recvbuff[i], std::min(N, 1024 * 1024),
- type, op, root, comms[i], s[i]);
+ NCCLCHECK(ncclReduce((const void*)sendbuff[i], (void*)recvbuff[i], std::min(N, 1024 * 1024),
+ type, op, root, comms[i], s[i]));
}
for (int i = 0; i < nDev; ++i) {
@@ -90,8 +90,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
//for (int i=0; i<100; i++) {
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclReduce((const void*)sendbuff[i], (void*)recvbuff[i], n, type, op,
- root, comms[i], s[i]);
+ NCCLCHECK(ncclReduce((const void*)sendbuff[i], (void*)recvbuff[i], n, type, op,
+ root, comms[i], s[i]));
}
//}
@@ -129,8 +129,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
//for (int i=0; i<100; i++) {
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
- ncclReduce((const void*)sendbuff[i], (void*)sendbuff[i], n, type, op,
- root, comms[i], s[i]);
+ NCCLCHECK(ncclReduce((const void*)sendbuff[i], (void*)sendbuff[i], n, type, op,
+ root, comms[i], s[i]));
}
//}
@@ -171,7 +171,7 @@ template<typename T>
void RunTests(const int N, const ncclDataType_t type, ncclComm_t* const comms,
const std::vector<int>& dList) {
int nDev = 0;
- ncclCommCount(comms[0], &nDev);
+ NCCLCHECK(ncclCommCount(comms[0], &nDev));
T** sendbuff = (T**)malloc(nDev * sizeof(T*));
T** recvbuff = (T**)malloc(nDev * sizeof(T*));
@@ -253,7 +253,7 @@ int main(int argc, char* argv[]) {
}
ncclComm_t* comms = (ncclComm_t*)malloc(sizeof(ncclComm_t)*nDev);
- ncclCommInitAll(comms, nDev, dList.data());
+ NCCLCHECK(ncclCommInitAll(comms, nDev, dList.data()));
if (!csv) {
printf("# Using devices\n");
@@ -261,8 +261,8 @@ int main(int argc, char* argv[]) {
int cudaDev;
int rank;
cudaDeviceProp prop;
- ncclCommCuDevice(comms[g], &cudaDev);
- ncclCommUserRank(comms[g], &rank);
+ NCCLCHECK(ncclCommCuDevice(comms[g], &cudaDev));
+ NCCLCHECK(ncclCommUserRank(comms[g], &rank));
CUDACHECK(cudaGetDeviceProperties(&prop, cudaDev));
printf("# Rank %2d uses device %2d [0x%02x] %s\n", rank, cudaDev,
prop.pciBusID, prop.name);