blob: b053e5b947d0e5b0596afba1ee231874f7305cfa (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
/*************************************************************************
* Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#include "channel.h"
#include "param.h"
NCCL_PARAM(Buffsize, "BUFFSIZE", DEFAULT_BUFFER_SIZE_BYTES);
ncclResult_t initChannel(struct ncclComm* comm, int channelid) {
struct ncclChannel* channel = comm->channels+channelid;
channel->id = channelid;
// Setup intermediate buffering
channel->buffSize = ncclParamBuffsize();
// Ring index to user rank table.
NCCLCHECK(ncclCudaCalloc(&channel->ring.devUserRanks, comm->nRanks));
NCCLCHECK(ncclCalloc(&channel->ring.userRanks, comm->nRanks));
// Communication structures with peers.
NCCLCHECK(ncclCudaCalloc(&channel->devPeers, comm->nRanks));
NCCLCHECK(ncclCalloc(&channel->peers, comm->nRanks));
for (size_t i=0; i<comm->nRanks; ++i) {
channel->peers[i].send.comm = comm;
channel->peers[i].recv.comm = comm;
}
// Per-channel operation list.
NCCLCHECK(ncclCudaHostAlloc((void**)&channel->collectives, (void**)&channel->devCollectives, sizeof(struct ncclColl)*NCCL_MAX_OPS));
return ncclSuccess;
}
ncclResult_t freeChannel(struct ncclChannel* channel, int nRanks) {
// Operation list
NCCLCHECK(ncclCudaHostFree(channel->collectives));
// Free Ring index to rank tables
free(channel->ring.userRanks);
CUDACHECK(cudaFree(channel->ring.devUserRanks));
// Free transport proxy resources
for (int r=0; r<nRanks; r++) {
struct ncclPeer* peer = channel->peers+r;
if (peer->send.transportResources) NCCLCHECK(peer->send.transportComm->free(peer->send.transportResources));
if (peer->recv.transportResources) NCCLCHECK(peer->recv.transportComm->free(peer->recv.transportResources));
}
// Free the peer structures.
CUDACHECK(cudaFree(channel->devPeers));
free(channel->peers);
return ncclSuccess;
}
|