Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/channel.cc')
-rw-r--r--src/channel.cc24
1 files changed, 18 insertions, 6 deletions
diff --git a/src/channel.cc b/src/channel.cc
index b053e5b..0a43e17 100644
--- a/src/channel.cc
+++ b/src/channel.cc
@@ -6,24 +6,32 @@
#include "channel.h"
#include "param.h"
+#include "graph.h"
-NCCL_PARAM(Buffsize, "BUFFSIZE", DEFAULT_BUFFER_SIZE_BYTES);
+#define DEFAULT_BUFFER_SIZE_BYTES (1LL << 22) /* 4MiB */
+#define DEFAULT_BUFFER_SIZE_BYTES_ARM (1LL << 20) /* 1MiB */
+
+NCCL_PARAM(Buffsize, "BUFFSIZE", -2);
ncclResult_t initChannel(struct ncclComm* comm, int channelid) {
struct ncclChannel* channel = comm->channels+channelid;
channel->id = channelid;
// Setup intermediate buffering
- channel->buffSize = ncclParamBuffsize();
+ int buffSize = ncclParamBuffsize();
+ int cpuArch, cpuVendor, cpuModel;
+ NCCLCHECK(ncclTopoCpuType(comm->topo, &cpuArch, &cpuVendor, &cpuModel));
+ channel->buffSize = buffSize != -2 ? buffSize :
+ cpuArch == NCCL_TOPO_CPU_ARCH_ARM ? DEFAULT_BUFFER_SIZE_BYTES_ARM : DEFAULT_BUFFER_SIZE_BYTES;
// Ring index to user rank table.
NCCLCHECK(ncclCudaCalloc(&channel->ring.devUserRanks, comm->nRanks));
NCCLCHECK(ncclCalloc(&channel->ring.userRanks, comm->nRanks));
// Communication structures with peers.
- NCCLCHECK(ncclCudaCalloc(&channel->devPeers, comm->nRanks));
- NCCLCHECK(ncclCalloc(&channel->peers, comm->nRanks));
- for (size_t i=0; i<comm->nRanks; ++i) {
+ NCCLCHECK(ncclCudaCalloc(&channel->devPeers, comm->nRanks+1)); // The extra one rank is for collnet root (i.e. network)
+ NCCLCHECK(ncclCalloc(&channel->peers, comm->nRanks+1));
+ for (size_t i=0; i<comm->nRanks+1; ++i) {
channel->peers[i].send.comm = comm;
channel->peers[i].recv.comm = comm;
}
@@ -42,9 +50,13 @@ ncclResult_t freeChannel(struct ncclChannel* channel, int nRanks) {
CUDACHECK(cudaFree(channel->ring.devUserRanks));
// Free transport proxy resources
- for (int r=0; r<nRanks; r++) {
+ // Note: free all send resources first due to CollNet arrangement
+ for (int r=0; r<nRanks+1; r++) {
struct ncclPeer* peer = channel->peers+r;
if (peer->send.transportResources) NCCLCHECK(peer->send.transportComm->free(peer->send.transportResources));
+ }
+ for (int r=0; r<nRanks+1; r++) {
+ struct ncclPeer* peer = channel->peers+r;
if (peer->recv.transportResources) NCCLCHECK(peer->recv.transportComm->free(peer->recv.transportResources));
}