Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/proxy.h')
-rw-r--r--src/include/proxy.h103
1 files changed, 103 insertions, 0 deletions
diff --git a/src/include/proxy.h b/src/include/proxy.h
new file mode 100644
index 0000000..9796baf
--- /dev/null
+++ b/src/include/proxy.h
@@ -0,0 +1,103 @@
+/*************************************************************************
+ * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * See LICENSE.txt for license information
+ ************************************************************************/
+
+#ifndef NCCL_PROXY_H_
+#define NCCL_PROXY_H_
+
+#include <pthread.h>
+
+enum ncclProxyOpState { ncclProxyOpNone, ncclProxyOpReady, ncclProxyOpProgress };
+
+struct ncclProxyArgs;
+typedef ncclResult_t (*proxyProgressFunc_t)(struct ncclProxyArgs*);
+
+struct ncclProxyArgs {
+ proxyProgressFunc_t progress;
+ struct ncclChannel* channel;
+ struct ncclConnector* connector;
+ size_t sendbytes;
+ size_t recvbytes;
+ int sliceSteps;
+ int chunkSteps;
+ int nsteps;
+ uint64_t opCount;
+ int protocol;
+ int segment; // Only for profiling
+ ncclDataType_t dtype;
+ ncclRedOp_t redOp;
+ int state; // add component before this line -- it is left out during initialization
+
+ // Internal state
+ uint64_t posted;
+ uint64_t received; // Only used by recv proxy to wait for flush.
+ uint64_t transmitted;
+ uint64_t done;
+ uint64_t end;
+ void* requests[NCCL_STEPS];
+ int idle;
+
+ // Element linking
+ pthread_mutex_t mutex;
+ struct ncclProxyArgs* next;
+ struct ncclProxyArgs* nextPeer;
+ struct ncclProxyArgs* nextGroup;
+ struct ncclProxyArgs** proxyAppendPtr;
+};
+
+struct ncclProxySharedBuffers {
+ int nslots;
+ int slotSize;
+ char* cudaBuff[2*MAXCHANNELS];
+ int* cudaUsed[2*MAXCHANNELS];
+ char* hostBuff[2*MAXCHANNELS];
+ int* hostUsed[2*MAXCHANNELS];
+ struct ncclProxyArgs* proxyAppend[2*MAXCHANNELS]; // Separate send and recv
+};
+
+struct ncclProxyPool;
+struct ncclProxyState {
+ pthread_cond_t cond;
+ pthread_mutex_t opsMutex;
+ pthread_mutex_t poolMutex;
+ bool stop;
+ struct ncclProxySharedBuffers* sharedBuffs;
+ struct ncclProxyArgs* ops;
+ struct ncclProxyArgs* nextOps;
+ struct ncclProxyArgs* nextOpsEnd;
+ struct ncclProxyArgs* pool;
+ struct ncclProxyPool* pools;
+};
+
+typedef ncclResult_t (*threadFunc_t)(struct ncclProxyArgs*);
+
+enum proxyMode {
+ proxyRing = 0,
+ proxyFrom = 1,
+ proxyTo = 2
+};
+
+ncclResult_t ncclProxySaveColl(struct ncclProxyArgs* args, int pattern, int root, int nranks);
+ncclResult_t ncclProxySaveP2p(struct ncclInfo* info, struct ncclChannel* channel, int segment);
+ncclResult_t ncclProxyStart(struct ncclComm* comm);
+ncclResult_t ncclProxyCreate(struct ncclComm* comm);
+ncclResult_t ncclProxyDestroy(struct ncclComm* comm);
+
+ncclResult_t ncclProxySharedBuffersInit(struct ncclComm* comm, int cuda, int* size, char** ptr);
+ncclResult_t ncclProxySharedBuffersAlloc(struct ncclComm* comm, int cuda, int type, int channel, int size, char** ptr);
+ncclResult_t ncclProxySharedBuffersFree(struct ncclComm* comm, int cuda, int type, int channel, int size, char* ptr);
+ncclResult_t ncclProxySharedBuffersDestroy(struct ncclComm* comm);
+
+#include <unistd.h>
+
+// Spin wait until func evaluates to true
+template<typename FUNC>
+inline void transportProxyWait(const FUNC& func) {
+ while (!func()) {
+ sched_yield();
+ }
+}
+
+#endif