diff options
Diffstat (limited to 'src/include/nccl_net.h')
-rw-r--r-- | src/include/nccl_net.h | 106 |
1 files changed, 61 insertions, 45 deletions
diff --git a/src/include/nccl_net.h b/src/include/nccl_net.h index d6ae9f8..8c016dc 100644 --- a/src/include/nccl_net.h +++ b/src/include/nccl_net.h @@ -1,5 +1,5 @@ /************************************************************************* - * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ @@ -8,30 +8,43 @@ #define NCCL_NET_H_ #include "nccl.h" +#include <stdint.h> #define NCCL_NET_HANDLE_MAXSIZE 64 #define NCCL_PTR_HOST 0x1 #define NCCL_PTR_CUDA 0x2 +// Maximum number of requests per comm object +#define NCCL_NET_MAX_REQUESTS 8 + typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel; -typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ALL=~0} ncclDebugLogSubSys; +typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALL=~0} ncclDebugLogSubSys; typedef void (*ncclDebugLogger_t)(ncclDebugLogLevel level, unsigned long flags, const char *file, int line, const char *fmt, ...); typedef struct { + char* name; // Used mostly for logging. + char* pciPath; // Path to the PCI device in /sys. + uint64_t guid; // Unique identifier for the NIC chip. Important for + // cards with multiple PCI functions (Physical or virtual). + int ptrSupport; // NCCL_PTR_HOST or NCCL_PTR_HOST|NCCL_PTR_CUDA + int speed; // Port speed in Mbps. + int port; // Port number. + int maxComms; // Maximum number of comms we can create +}ncclNetProperties_v4_t; + +typedef ncclNetProperties_v4_t ncclNetProperties_t; + +typedef struct { // Name of the network (mainly for logs) const char* name; // Initialize the network. ncclResult_t (*init)(ncclDebugLogger_t logFunction); // Return the number of adapters. ncclResult_t (*devices)(int* ndev); - // Return the device path in /sys. NCCL will call free on this path. - ncclResult_t (*pciPath)(int dev, char** path); - // Return whether this device supports host pointers and/or CUDA pointers - // as data from the current GPU. Supported types should be composed with - // NCCL_PTR_HOST and NCCL_PTR_CUDA. - ncclResult_t (*ptrSupport)(int dev, int* supportedTypes); + // Get various device properties. + ncclResult_t (*getProperties)(int dev, ncclNetProperties_v4_t* props); // Create a receiving object and provide a handle to connect to it. The // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged // between ranks to create a connection. @@ -40,15 +53,19 @@ typedef struct { ncclResult_t (*connect)(int dev, void* handle, void** sendComm); // Finalize connection establishment after remote peer has called connectHandle ncclResult_t (*accept)(void* listenComm, void** recvComm); - // Asynchronous send to a peer. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA. + // Register/Deregister memory. Comm can be either a sendComm or a recvComm. + // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA. + ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle); + ncclResult_t (*deregMr)(void* comm, void* mhandle); + // Asynchronous send to a peer. // May return request == NULL if the call cannot be performed (or would block) - ncclResult_t (*isend)(void* sendComm, void* data, int size, int type, void** request); - // Asynchronous recv from a peer. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA. + ncclResult_t (*isend)(void* sendComm, void* data, int size, void* mhandle, void** request); + // Asynchronous recv from a peer. // May return request == NULL if the call cannot be performed (or would block) - ncclResult_t (*irecv)(void* recvComm, void* data, int size, int type, void** request); + ncclResult_t (*irecv)(void* recvComm, void* data, int size, void* mhandle, void** request); // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is // visible to the GPU - ncclResult_t (*flush)(void* recvComm, void* data, int size); + ncclResult_t (*iflush)(void* recvComm, void* data, int size, void* mhandle, void** request); // Test whether a request is complete. If size is not NULL, it returns the // number of bytes sent/received. ncclResult_t (*test)(void* request, int* done, int* size); @@ -56,53 +73,52 @@ typedef struct { ncclResult_t (*closeSend)(void* sendComm); ncclResult_t (*closeRecv)(void* recvComm); ncclResult_t (*closeListen)(void* listenComm); -} ncclNet_v1_t; +} ncclNet_v4_t; + +typedef ncclNet_v4_t ncclNet_t; + +#define NCCL_PLUGIN_SYMBOL ncclNetPlugin_v4 typedef struct { - // Name of the network (mainly for logs) + // Name of the collective network (mainly for logs) const char* name; - // Initialize the network. + // Initialize the collective network. ncclResult_t (*init)(ncclDebugLogger_t logFunction); - // Return the number of adapters. + // Return the number of adapters capable of doing collective operations. + // If ndev returns 0, all other functions might be set to NULL. ncclResult_t (*devices)(int* ndev); - // Return the device path in /sys. NCCL will call free on this path. - ncclResult_t (*pciPath)(int dev, char** path); - // Return whether this device supports host pointers and/or CUDA pointers - // as data from the current GPU. Supported types should be composed with - // NCCL_PTR_HOST and NCCL_PTR_CUDA. - ncclResult_t (*ptrSupport)(int dev, int* supportedTypes); + // Get various device properties. + ncclResult_t (*getProperties)(int dev, ncclNetProperties_v4_t* props); // Create a receiving object and provide a handle to connect to it. The // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged - // between ranks to create a connection. + // between ranks to create connections. ncclResult_t (*listen)(int dev, void* handle, void** listenComm); - // Connect to a handle and return a sending comm object for that peer. - ncclResult_t (*connect)(int dev, void* handle, void** sendComm); - // Finalize connection establishment after remote peer has called connectHandle - ncclResult_t (*accept)(void* listenComm, void** recvComm); - // Register/Deregister memory. Comm can be either a sendComm or a recvComm. - // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA. - ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle); - ncclResult_t (*deregMr)(void* comm, void* mhandle); - // Asynchronous send to a peer. - // May return request == NULL if the call cannot be performed (or would block) - ncclResult_t (*isend)(void* sendComm, void* data, int size, void* mhandle, void** request); - // Asynchronous recv from a peer. - // May return request == NULL if the call cannot be performed (or would block) - ncclResult_t (*irecv)(void* recvComm, void* data, int size, void* mhandle, void** request); + // Create a group for collective operations. handles have been created + // using listen() above. rank indicates caller's rank in the collective network. + ncclResult_t (*connect)(void* handles[], int nranks, int rank, void* listenComm, void** collComm); + // Returns whether a reduction operation on a data type is supported. + // 1 for supported, 0 otherwise. + ncclResult_t (*reduceSupport)(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported); + // Register/Deregister memory. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA. + ncclResult_t (*regMr)(void* collComm, void* data, int size, int type, void** mhandle); + ncclResult_t (*deregMr)(void* collComm, void* mhandle); + // Performs an asynchronous allreduce operation on the collective group. + // May return request == NULL if the call cannot be performed (or would block). + ncclResult_t (*iallreduce)(void* collComm, void* sendData, void* recvData, int count, + ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request); // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is // visible to the GPU - ncclResult_t (*flush)(void* recvComm, void* data, int size, void* mhandle); + ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request); // Test whether a request is complete. If size is not NULL, it returns the // number of bytes sent/received. ncclResult_t (*test)(void* request, int* done, int* size); - // Close and free send/recv comm objects - ncclResult_t (*closeSend)(void* sendComm); - ncclResult_t (*closeRecv)(void* recvComm); + // Close and free collective comm objects + ncclResult_t (*closeColl)(void* collComm); ncclResult_t (*closeListen)(void* listenComm); -} ncclNet_v2_t; +} ncclCollNet_v4_t; -typedef ncclNet_v2_t ncclNet_t; +typedef ncclCollNet_v4_t ncclCollNet_t; -#define NCCL_PLUGIN_SYMBOL ncclNetPlugin_v2 +#define NCCL_COLLNET_PLUGIN_SYMBOL ncclCollNetPlugin_v4 #endif // end include guard |