1 files changed, 61 insertions, 45 deletions
diff --git a/src/include/nccl_net.h b/src/include/nccl_net.h
index d6ae9f8..8c016dc 100644
--- a/src/include/nccl_net.h
+++ b/src/include/nccl_net.h
@@ -1,5 +1,5 @@
 /*************************************************************************
- * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
  *
  * See LICENSE.txt for license information
  ************************************************************************/
@@ -8,30 +8,43 @@
 #define NCCL_NET_H_
 
 #include "nccl.h"
+#include <stdint.h>
 
 #define NCCL_NET_HANDLE_MAXSIZE 64
 
 #define NCCL_PTR_HOST 0x1
 #define NCCL_PTR_CUDA 0x2
 
+// Maximum number of requests per comm object
+#define NCCL_NET_MAX_REQUESTS 8
+
 typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel;
-typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ALL=~0} ncclDebugLogSubSys;
+typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALL=~0} ncclDebugLogSubSys;
 
 typedef void (*ncclDebugLogger_t)(ncclDebugLogLevel level, unsigned long flags, const char *file, int line, const char *fmt, ...);
 
 typedef struct {
+  char* name;     // Used mostly for logging.
+  char* pciPath;  // Path to the PCI device in /sys.
+  uint64_t guid;  // Unique identifier for the NIC chip. Important for
+                  // cards with multiple PCI functions (Physical or virtual).
+  int ptrSupport; // NCCL_PTR_HOST or NCCL_PTR_HOST|NCCL_PTR_CUDA
+  int speed;      // Port speed in Mbps.
+  int port;       // Port number.
+  int maxComms;   // Maximum number of comms we can create
+}ncclNetProperties_v4_t;
+
+typedef ncclNetProperties_v4_t ncclNetProperties_t;
+
+typedef struct {
   // Name of the network (mainly for logs)
   const char* name;
   // Initialize the network.
   ncclResult_t (*init)(ncclDebugLogger_t logFunction);
   // Return the number of adapters.
   ncclResult_t (*devices)(int* ndev);
-  // Return the device path in /sys. NCCL will call free on this path.
-  ncclResult_t (*pciPath)(int dev, char** path);
-  // Return whether this device supports host pointers and/or CUDA pointers
-  // as data from the current GPU. Supported types should be composed with
-  // NCCL_PTR_HOST and NCCL_PTR_CUDA.
-  ncclResult_t (*ptrSupport)(int dev, int* supportedTypes);
+  // Get various device properties.
+  ncclResult_t (*getProperties)(int dev, ncclNetProperties_v4_t* props);
   // Create a receiving object and provide a handle to connect to it. The
   // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
   // between ranks to create a connection.
@@ -40,15 +53,19 @@ typedef struct {
   ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
   // Finalize connection establishment after remote peer has called connectHandle
   ncclResult_t (*accept)(void* listenComm, void** recvComm);
-  // Asynchronous send to a peer. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  // Register/Deregister memory. Comm can be either a sendComm or a recvComm.
+  // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
+  ncclResult_t (*deregMr)(void* comm, void* mhandle);
+  // Asynchronous send to a peer.
   // May return request == NULL if the call cannot be performed (or would block)
-  ncclResult_t (*isend)(void* sendComm, void* data, int size, int type, void** request);
-  // Asynchronous recv from a peer. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  ncclResult_t (*isend)(void* sendComm, void* data, int size, void* mhandle, void** request);
+  // Asynchronous recv from a peer.
   // May return request == NULL if the call cannot be performed (or would block)
-  ncclResult_t (*irecv)(void* recvComm, void* data, int size, int type, void** request);
+  ncclResult_t (*irecv)(void* recvComm, void* data, int size, void* mhandle, void** request);
   // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
   // visible to the GPU
-  ncclResult_t (*flush)(void* recvComm, void* data, int size);
+  ncclResult_t (*iflush)(void* recvComm, void* data, int size, void* mhandle, void** request);
   // Test whether a request is complete. If size is not NULL, it returns the
   // number of bytes sent/received.
   ncclResult_t (*test)(void* request, int* done, int* size);
@@ -56,53 +73,52 @@ typedef struct {
   ncclResult_t (*closeSend)(void* sendComm);
   ncclResult_t (*closeRecv)(void* recvComm);
   ncclResult_t (*closeListen)(void* listenComm);
-} ncclNet_v1_t;
+} ncclNet_v4_t;
+
+typedef ncclNet_v4_t ncclNet_t;
+
+#define NCCL_PLUGIN_SYMBOL ncclNetPlugin_v4
 
 typedef struct {
-  // Name of the network (mainly for logs)
+  // Name of the collective network (mainly for logs)
   const char* name;
-  // Initialize the network.
+  // Initialize the collective network.
   ncclResult_t (*init)(ncclDebugLogger_t logFunction);
-  // Return the number of adapters.
+  // Return the number of adapters capable of doing collective operations.
+  // If ndev returns 0, all other functions might be set to NULL.
   ncclResult_t (*devices)(int* ndev);
-  // Return the device path in /sys. NCCL will call free on this path.
-  ncclResult_t (*pciPath)(int dev, char** path);
-  // Return whether this device supports host pointers and/or CUDA pointers
-  // as data from the current GPU. Supported types should be composed with
-  // NCCL_PTR_HOST and NCCL_PTR_CUDA.
-  ncclResult_t (*ptrSupport)(int dev, int* supportedTypes);
+  // Get various device properties.
+  ncclResult_t (*getProperties)(int dev, ncclNetProperties_v4_t* props);
   // Create a receiving object and provide a handle to connect to it. The
   // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
-  // between ranks to create a connection.
+  // between ranks to create connections.
   ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
-  // Connect to a handle and return a sending comm object for that peer.
-  ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
-  // Finalize connection establishment after remote peer has called connectHandle
-  ncclResult_t (*accept)(void* listenComm, void** recvComm);
-  // Register/Deregister memory. Comm can be either a sendComm or a recvComm.
-  // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
-  ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
-  ncclResult_t (*deregMr)(void* comm, void* mhandle);
-  // Asynchronous send to a peer.
-  // May return request == NULL if the call cannot be performed (or would block)
-  ncclResult_t (*isend)(void* sendComm, void* data, int size, void* mhandle, void** request);
-  // Asynchronous recv from a peer.
-  // May return request == NULL if the call cannot be performed (or would block)
-  ncclResult_t (*irecv)(void* recvComm, void* data, int size, void* mhandle, void** request);
+  // Create a group for collective operations. handles have been created
+  // using listen() above. rank indicates caller's rank in the collective network.
+  ncclResult_t (*connect)(void* handles[], int nranks, int rank, void* listenComm, void** collComm);
+  // Returns whether a reduction operation on a data type is supported.
+  // 1 for supported, 0 otherwise.
+  ncclResult_t (*reduceSupport)(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported);
+  // Register/Deregister memory. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
+  ncclResult_t (*regMr)(void* collComm, void* data, int size, int type, void** mhandle);
+  ncclResult_t (*deregMr)(void* collComm, void* mhandle);
+  // Performs an asynchronous allreduce operation on the collective group.
+  // May return request == NULL if the call cannot be performed (or would block).
+  ncclResult_t (*iallreduce)(void* collComm, void* sendData, void* recvData, int count,
+      ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request);
   // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
   // visible to the GPU
-  ncclResult_t (*flush)(void* recvComm, void* data, int size, void* mhandle);
+  ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request);
   // Test whether a request is complete. If size is not NULL, it returns the
   // number of bytes sent/received.
   ncclResult_t (*test)(void* request, int* done, int* size);
-  // Close and free send/recv comm objects
-  ncclResult_t (*closeSend)(void* sendComm);
-  ncclResult_t (*closeRecv)(void* recvComm);
+  // Close and free collective comm objects
+  ncclResult_t (*closeColl)(void* collComm);
   ncclResult_t (*closeListen)(void* listenComm);
-} ncclNet_v2_t;
+} ncclCollNet_v4_t;
 
-typedef ncclNet_v2_t ncclNet_t;
+typedef ncclCollNet_v4_t ncclCollNet_t;
 
-#define NCCL_PLUGIN_SYMBOL ncclNetPlugin_v2
+#define NCCL_COLLNET_PLUGIN_SYMBOL ncclCollNetPlugin_v4
 
 #endif // end include guard