Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'test/single/broadcast_scan.cu')
-rw-r--r--test/single/broadcast_scan.cu232
1 files changed, 0 insertions, 232 deletions
diff --git a/test/single/broadcast_scan.cu b/test/single/broadcast_scan.cu
deleted file mode 100644
index ea11c7d..0000000
--- a/test/single/broadcast_scan.cu
+++ /dev/null
@@ -1,232 +0,0 @@
-/*************************************************************************
- * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- ************************************************************************/
-
-#include <chrono>
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include <float.h>
-
-#include "nccl.h"
-#include "test_utilities.h"
-#include <nvToolsExt.h>
-
-void showUsage(const char* bin) {
- printf("\n"
- "Usage: %s <type> <n_min> <n_max> [delta] [gpus] [gpu0 [gpu1 [...]]]\n"
- "Where:\n"
-#ifdef CUDA_HAS_HALF
- " type = [char|int|half|float|double|int64|uint64]\n"
-#else
- " type = [char|int|float|double|int64|uint64]\n"
-#endif
- " n_min > 0\n"
- " n_max >= n_min\n"
- " delta > 0\n\n", bin);
- return;
-}
-
-int main(int argc, char* argv[]) {
- int nvis = 0;
- CUDACHECK(cudaGetDeviceCount(&nvis));
- if (nvis == 0) {
- printf("No GPUs found\n");
- showUsage(argv[0]);
- exit(EXIT_FAILURE);
- }
-
- ncclDataType_t type;
- int n_min;
- int n_max;
- int delta;
- int gpus;
- int* list = NULL;
-
- if (argc < 4) {
- showUsage(argv[0]);
- exit(EXIT_FAILURE);
- }
-
- type = strToType(argv[1]);
- if (type == nccl_NUM_TYPES) {
- printf("Invalid <type> '%s'\n", argv[1]);
- showUsage(argv[0]);
- exit(EXIT_FAILURE);
- }
-
- n_min = strToPosInt(argv[2]);
- if (n_min < 1) {
- printf("Invalid <n_min> '%s'\n", argv[2]);
- showUsage(argv[0]);
- exit(EXIT_FAILURE);
- }
-
- n_max = strToPosInt(argv[3]);
- if (n_max < n_min) {
- printf("Invalid <n_max> '%s'\n", argv[3]);
- showUsage(argv[0]);
- exit(EXIT_FAILURE);
- }
-
- if (argc > 4) {
- delta = strToPosInt(argv[4]);
- if (delta < 1) {
- printf("Invalid <delta> '%s'\n", argv[4]);
- showUsage(argv[0]);
- exit(EXIT_FAILURE);
- }
- } else {
- delta = (n_max == n_min) ? 1 : (n_max - n_min+9) / 10;
- }
-
- if (argc > 5) {
- gpus = strToPosInt(argv[5]);
- if (gpus < 1) {
- printf("Invalid <gpus> '%s'\n", argv[5]);
- showUsage(argv[0]);
- exit(EXIT_FAILURE);
- }
- } else {
- gpus = nvis;
- }
-
- list = (int*)malloc(gpus*sizeof(int));
-
- if (argc > 6 && argc != 6+gpus) {
- printf("If given, GPU list must be fully specified.\n");
- showUsage(argv[0]);
- exit(EXIT_FAILURE);
- }
-
- for(int g=0; g<gpus; ++g) {
- if(argc > 6) {
- list[g] = strToNonNeg(argv[6+g]);
- if (list[g] < 0) {
- printf("Invalid GPU%d '%s'\n", g, argv[6+g]);
- showUsage(argv[0]);
- exit(EXIT_FAILURE);
- } else if (list[g] >= nvis) {
- printf("GPU%d (%d) exceeds visible devices (%d)\n", g, list[g], nvis);
- showUsage(argv[0]);
- exit(EXIT_FAILURE);
- }
- } else {
- list[g] = g % nvis;
- }
- }
-
- size_t word = wordSize(type);
- size_t max_size = n_max * word;
- void* refout;
- CUDACHECK(cudaMallocHost(&refout, max_size));
-
- void** io;
- double* localError;
- ncclComm_t* comm;
- cudaStream_t* stream;
-
- io = (void**)malloc(gpus*sizeof(void*));
- CUDACHECK(cudaMallocHost(&localError, gpus*sizeof(double)));
- comm = (ncclComm_t*)malloc(gpus*sizeof(ncclComm_t));
- stream = (cudaStream_t*)malloc(gpus*sizeof(cudaStream_t));
-
- for(int g=0; g<gpus; ++g) {
- char busid[32] = {0};
- CUDACHECK(cudaDeviceGetPCIBusId(busid, 32, list[g]));
- printf("# Rank %d using device %d [%s]\n", g, list[g], busid);
-
- CUDACHECK(cudaSetDevice(list[g]));
- CUDACHECK(cudaStreamCreate(&stream[g]));
- CUDACHECK(cudaMalloc(&io[g], max_size));
- if(g == 0) {
- makeRandom(io[g], n_max, type, 42+g);
- CUDACHECK(cudaMemcpy(refout, io[g], max_size, cudaMemcpyDeviceToHost));
- }
- }
-
- NCCLCHECK(ncclCommInitAll(comm, gpus, list));
-
- printf(" BYTES ERROR MSEC BW\n");
-
- for(int n=n_min; n<=n_max; n+=delta) {
- size_t bytes = word * n;
-
- for(int g=1; g<gpus; ++g) {
- CUDACHECK(cudaSetDevice(list[g]));
- CUDACHECK(cudaMemsetAsync(io[g], 0, bytes, stream[g]));
- }
- for(int g=0; g<gpus; ++g) {
- CUDACHECK(cudaSetDevice(list[g]));
- CUDACHECK(cudaStreamSynchronize(stream[0]));
- }
-
- auto start = std::chrono::high_resolution_clock::now();
- for(int g=0; g<gpus; ++g) {
- CUDACHECK(cudaSetDevice(list[g]));
- NCCLCHECK(ncclBcast(io[g], n, type, 0, comm[g], stream[g]));
- }
- for(int g=0; g<gpus; ++g) {
- CUDACHECK(cudaSetDevice(list[g]));
- CUDACHECK(cudaStreamSynchronize(stream[g]));
- }
- auto stop = std::chrono::high_resolution_clock::now();
- double ms = std::chrono::duration_cast<std::chrono::duration<double>>
- (stop - start).count() * 1000.0;
-
- for(int g=1; g<gpus; ++g) {
- CUDACHECK(cudaSetDevice(list[g]));
- maxDiff(localError+g, io[g], refout, n, type, stream[g]);
- }
- double maxError = 0.0;
- for(int g=1; g<gpus; ++g) {
- CUDACHECK(cudaSetDevice(list[g]));
- CUDACHECK(cudaStreamSynchronize(stream[g]));
- maxError = max(maxError, localError[g]);
- }
-
- double mb = (double)bytes * 1.e-6;
- double algbw = mb / ms;
- printf("%12lu %5.0le %10.3lf %6.2lf\n",
- n*word, maxError, ms, algbw);
- }
-
- for(int g=0; g<gpus; ++g) {
- CUDACHECK(cudaSetDevice(list[g]));
- CUDACHECK(cudaStreamDestroy(stream[g]));
- ncclCommDestroy(comm[g]);
- CUDACHECK(cudaFree(io[g]));
- }
-
- free(io);
- free(comm);
- free(stream);
- CUDACHECK(cudaFreeHost(refout));
- CUDACHECK(cudaFreeHost(localError));
- exit(EXIT_SUCCESS);
-}
-