Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSylvain Jeaugey <sjeaugey@nvidia.com>2016-12-02 02:16:35 +0300
committerSylvain Jeaugey <sjeaugey@nvidia.com>2016-12-02 02:16:35 +0300
commit1093821c335437b399035f3ebf3b67a3e960de8f (patch)
treec1fa3aac5616168c93ed159d5886438195320d0f
parentddddfba1c0c4a17bce7377812a436c09dc53cf15 (diff)
Replace min BW by average BW in tests
-rw-r--r--test/single/all_gather_test.cu14
-rw-r--r--test/single/all_reduce_test.cu16
-rw-r--r--test/single/broadcast_test.cu14
-rw-r--r--test/single/reduce_scatter_test.cu16
-rw-r--r--test/single/reduce_test.cu16
5 files changed, 48 insertions, 28 deletions
diff --git a/test/single/all_gather_test.cu b/test/single/all_gather_test.cu
index ba3841f..40d2f31 100644
--- a/test/single/all_gather_test.cu
+++ b/test/single/all_gather_test.cu
@@ -14,7 +14,8 @@
#include "test_utilities.h"
int errors = 0;
-double min_bw = 10000.0;
+double avg_bw = 0.0;
+int avg_count = 0;
bool is_reduction = false;
template<typename T>
@@ -89,7 +90,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
maxDelta);
if (maxDelta > deltaMaxValue(type, is_reduction)) errors++;
- if (busbw < min_bw) min_bw = busbw;
+ avg_bw += busbw;
+ avg_count++;
+
}
for (int i = 0; i < nDev; ++i) {
@@ -218,12 +221,13 @@ int main(int argc, char* argv[]) {
free(comms);
char* str = getenv("NCCL_TESTS_MIN_BW");
- double check_min_bw = str ? atof(str) : -1;
+ double check_avg_bw = str ? atof(str) : -1;
+ avg_bw /= avg_count;
printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK");
- printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK"));
+ printf(" Avg bus bandwidth : %g %s\n", avg_bw, check_avg_bw == -1 ? "" : (avg_bw < check_avg_bw ? "FAILED" : "OK"));
printf("\n");
- if (errors || min_bw < check_min_bw)
+ if (errors || avg_bw < check_avg_bw)
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
diff --git a/test/single/all_reduce_test.cu b/test/single/all_reduce_test.cu
index 642be80..1935a38 100644
--- a/test/single/all_reduce_test.cu
+++ b/test/single/all_reduce_test.cu
@@ -16,7 +16,8 @@
int csv = false;
int errors = 0;
-double min_bw = 10000.0;
+double avg_bw = 0.0;
+int avg_count = 0;
bool is_reduction = true;
template<typename T>
@@ -99,7 +100,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
elapsedSec * 1.0E3, algbw, busbw, maxDelta);
if (maxDelta > deltaMaxValue(type, is_reduction)) errors++;
- if (busbw < min_bw) min_bw = busbw;
+ avg_bw += busbw;
+ avg_count++;
nvtxRangePop();
}
@@ -145,7 +147,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
elapsedSec * 1.0E3, algbw, busbw, maxDelta);
if (maxDelta > deltaMaxValue(type, is_reduction)) errors++;
- if (busbw < min_bw) min_bw = busbw;
+ avg_bw += busbw;
+ avg_count++;
nvtxRangePop();
}
@@ -284,12 +287,13 @@ int main(int argc, char* argv[]) {
free(comms);
char* str = getenv("NCCL_TESTS_MIN_BW");
- double check_min_bw = str ? atof(str) : -1;
+ double check_avg_bw = str ? atof(str) : -1;
+ avg_bw /= avg_count;
printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK");
- printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK"));
+ printf(" Avg bus bandwidth : %g %s\n", avg_bw, check_avg_bw == -1 ? "" : (avg_bw < check_avg_bw ? "FAILED" : "OK"));
printf("\n");
- if (errors || min_bw < check_min_bw)
+ if (errors || avg_bw < check_avg_bw)
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
diff --git a/test/single/broadcast_test.cu b/test/single/broadcast_test.cu
index 30afebd..6b1e04f 100644
--- a/test/single/broadcast_test.cu
+++ b/test/single/broadcast_test.cu
@@ -14,7 +14,8 @@
#include "test_utilities.h"
int errors = 0;
-double min_bw = 10000.0;
+double avg_bw = 0.0;
+int avg_count = 0;
bool is_reduction = false;
template<typename T>
@@ -91,7 +92,9 @@ void RunTest(T** buff, const int N, const ncclDataType_t type, const int root,
maxDelta);
if (maxDelta > deltaMaxValue(type, is_reduction)) errors++;
- if (busbw < min_bw) min_bw = busbw;
+ avg_bw += busbw;
+ avg_count++;
+
}
for(int i=0; i < nDev; ++i) {
@@ -218,12 +221,13 @@ int main(int argc, char* argv[]) {
free(comms);
char* str = getenv("NCCL_TESTS_MIN_BW");
- double check_min_bw = str ? atof(str) : -1;
+ double check_avg_bw = str ? atof(str) : -1;
+ avg_bw /= avg_count;
printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK");
- printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK"));
+ printf(" Avg bus bandwidth : %g %s\n", avg_bw, check_avg_bw == -1 ? "" : (avg_bw < check_avg_bw ? "FAILED" : "OK"));
printf("\n");
- if (errors || min_bw < check_min_bw)
+ if (errors || avg_bw < check_avg_bw)
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
diff --git a/test/single/reduce_scatter_test.cu b/test/single/reduce_scatter_test.cu
index 81f3004..b702800 100644
--- a/test/single/reduce_scatter_test.cu
+++ b/test/single/reduce_scatter_test.cu
@@ -14,7 +14,8 @@
#include "test_utilities.h"
int errors = 0;
-double min_bw = 10000.0;
+double avg_bw = 0.0;
+int avg_count = 0;
bool is_reduction = true;
template<typename T>
@@ -95,7 +96,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
maxDelta);
if (maxDelta > deltaMaxValue(type, is_reduction)) errors++;
- if (busbw < min_bw) min_bw = busbw;
+ avg_bw += busbw;
+ avg_count++;
}
{
@@ -134,7 +136,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
maxDelta);
if (maxDelta > deltaMaxValue(type, is_reduction)) errors++;
- if (busbw < min_bw) min_bw = busbw;
+ avg_bw += busbw;
+ avg_count++;
}
for (int i = 0; i < nDev; ++i) {
@@ -268,12 +271,13 @@ int main(int argc, char* argv[]) {
free(comms);
char* str = getenv("NCCL_TESTS_MIN_BW");
- double check_min_bw = str ? atof(str) : -1;
+ double check_avg_bw = str ? atof(str) : -1;
+ avg_bw /= avg_count;
printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK");
- printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK"));
+ printf(" Avg bus bandwidth : %g %s\n", avg_bw, check_avg_bw == -1 ? "" : (avg_bw < check_avg_bw ? "FAILED" : "OK"));
printf("\n");
- if (errors || min_bw < check_min_bw)
+ if (errors || avg_bw < check_avg_bw)
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);
diff --git a/test/single/reduce_test.cu b/test/single/reduce_test.cu
index aa0d20f..6abb49c 100644
--- a/test/single/reduce_test.cu
+++ b/test/single/reduce_test.cu
@@ -16,7 +16,8 @@
int csv = false;
int errors = 0;
-double min_bw = 10000.0;
+double avg_bw = 0.0;
+int avg_count = 0;
bool is_reduction = true;
template<typename T>
@@ -98,7 +99,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
elapsedSec * 1.0E3, algbw, busbw, maxDelta);
if (maxDelta > deltaMaxValue(type, is_reduction)) errors++;
- if (busbw < min_bw) min_bw = busbw;
+ avg_bw += busbw;
+ avg_count++;
nvtxRangePop();
}
@@ -140,7 +142,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
elapsedSec * 1.0E3, algbw, busbw, maxDelta);
if (maxDelta > deltaMaxValue(type, is_reduction)) errors++;
- if (busbw < min_bw) min_bw = busbw;
+ avg_bw += busbw;
+ avg_count++;
nvtxRangePop();
}
@@ -282,12 +285,13 @@ int main(int argc, char* argv[]) {
free(comms);
char* str = getenv("NCCL_TESTS_MIN_BW");
- double check_min_bw = str ? atof(str) : -1;
+ double check_avg_bw = str ? atof(str) : -1;
+ avg_bw /= avg_count;
printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK");
- printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK"));
+ printf(" Avg bus bandwidth : %g %s\n", avg_bw, check_avg_bw == -1 ? "" : (avg_bw < check_avg_bw ? "FAILED" : "OK"));
printf("\n");
- if (errors || min_bw < check_min_bw)
+ if (errors || avg_bw < check_avg_bw)
exit(EXIT_FAILURE);
else
exit(EXIT_SUCCESS);