1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
/*************************************************************************
* Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#ifndef NCCL_TRANSPORT_H_
#define NCCL_TRANSPORT_H_
#include "devcomm.h"
#include "graph.h"
#include "nvmlwrap.h"
#include "core.h"
#define NTRANSPORTS 3
#define TRANSPORT_P2P 0
#define TRANSPORT_SHM 1
#define TRANSPORT_NET 2
extern struct ncclTransport ncclTransports[];
// Forward declarations
struct ncclRing;
struct ncclConnector;
struct ncclComm;
struct ncclPeerInfo {
int rank;
int cudaDev;
int gdrSupport;
uint64_t hostHash;
uint64_t pidHash;
dev_t shmDev;
int64_t busId;
};
#define CONNECT_SIZE 128
struct ncclConnect {
char data[CONNECT_SIZE];
};
enum ncclProxyOpState { ncclProxyOpNone, ncclProxyOpReady, ncclProxyOpProgress };
struct ncclProxyArgs;
typedef ncclResult_t (*proxyProgressFunc_t)(struct ncclProxyArgs*);
struct ncclProxyArgs {
proxyProgressFunc_t progress;
struct ncclChannel* channel;
struct ncclConnector* connector;
int sliceSteps;
int chunkSteps;
int nsteps;
uint64_t opCount;
int protocol;
ncclDataType_t dtype;
ncclRedOp_t redOp;
int state; // add component before this line -- it is left out during initialization
// Internal state
uint64_t head;
uint64_t tail;
uint64_t end;
void* requests[NCCL_STEPS];
int idle;
// Element linking
pthread_mutex_t mutex;
struct ncclProxyArgs* next;
struct ncclProxyArgs* nextPeer;
};
struct ncclProxyPool;
struct ncclProxyState {
pthread_cond_t cond;
pthread_mutex_t mutex;
bool stop;
struct ncclProxyArgs* ops;
struct ncclProxyArgs* pool;
struct ncclProxyPool* pools;
};
struct ncclTransportComm {
ncclResult_t (*setup)(struct ncclTopoSystem* topo, struct ncclTopoGraph* graph, struct ncclPeerInfo*, struct ncclPeerInfo*, struct ncclConnect*, struct ncclConnector*, int buffSize, int channelId);
ncclResult_t (*connect)(struct ncclConnect*, int nranks, int rank, struct ncclConnector*);
ncclResult_t (*free)(void*);
ncclResult_t (*proxy)(struct ncclProxyArgs*);
};
struct ncclTransport {
const char name[4];
ncclResult_t (*canConnect)(int*, struct ncclTopoSystem* topo, struct ncclTopoGraph* graph, struct ncclPeerInfo*, struct ncclPeerInfo*);
struct ncclTransportComm send;
struct ncclTransportComm recv;
};
#include <pthread.h>
typedef ncclResult_t (*threadFunc_t)(struct ncclProxyArgs*);
enum proxyMode {
proxyRing = 0,
proxyFrom = 1,
proxyTo = 2
};
ncclResult_t transportAllocateProxyArgs(struct ncclComm* comm, struct ncclProxyArgs** argsptr);
ncclResult_t transportSaveProxies(struct ncclProxyArgs* args, int pattern, int root, int nranks);
ncclResult_t transportStartProxy(struct ncclComm* comm);
ncclResult_t transportCreateProxy(struct ncclComm* comm);
ncclResult_t transportDestroyProxy(struct ncclComm* comm);
#include <unistd.h>
// Spin wait until func evaluates to true
template<typename FUNC>
inline void transportProxyWait(const FUNC& func) {
while (!func()) {
sched_yield();
}
}
#endif
|