diff options
Diffstat (limited to 'src/graph/topo.h')
-rw-r--r-- | src/graph/topo.h | 170 |
1 files changed, 102 insertions, 68 deletions
diff --git a/src/graph/topo.h b/src/graph/topo.h index 6b8a2f9..a12bb2d 100644 --- a/src/graph/topo.h +++ b/src/graph/topo.h @@ -1,5 +1,5 @@ /************************************************************************* - * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ @@ -9,22 +9,26 @@ #include "graph.h" #include "core.h" - -#define LOC_WIDTH 5000 -#define PASCAL_NVLINK_WIDTH 18 -#define VOLTA_NVLINK_WIDTH 21 -#define PCI_WIDTH 12 // PCI Gen3 x16 -#define QPI_WIDTH 8 -#define SKL_QPI_WIDTH 12 -#define P9_WIDTH 32 -#define NET_WIDTH 12 // 100Gbit - -// Intel CPU convert GPU P2P traffic into 64B PCI TLPs, to GPU -// to GPU traffic consumed more PCI bandwidth. +#include <sched.h> + +#define LOC_WIDTH 5000.0 +#define SM60_NVLINK_WIDTH 18.0 +#define SM70_NVLINK_WIDTH 21.0 +#define SM80_NVLINK_WIDTH 21.0 +#define SM86_NVLINK_WIDTH 12.0 +#define PCI_WIDTH 12.0 // PCI Gen3 x16 +#define QPI_WIDTH 6.0 +#define SKL_QPI_WIDTH 9.0 +#define P9_WIDTH 32.0 +#define ARM_WIDTH 6.0 +#define NET_WIDTH 12.0 // 100Gbit + +// Intel CPU convert GPU P2P traffic into 64B PCI TLPs, so GPU +// to GPU traffic consumes more PCI bandwidth. #define INTEL_P2P(speed) (speed*9/12) #define INTEL_P2P_OVERHEAD(speed) (speed*12/9) -#define NCCL_TOPO_NODE_TYPES 6 +#define NCCL_TOPO_NODE_TYPES 7 #define GPU 0 #define PCI 1 #define NVS 2 @@ -33,37 +37,73 @@ #define NET 5 extern const char* topoNodeTypeStr[]; +// We want link types and path types to match as much as possible #define LINK_LOC 0 #define LINK_NVL 1 -#define LINK_PCI 2 -#define LINK_QPI 3 -#define LINK_NET 4 +// Skipping 2 for PATH_NVB +#define LINK_PCI 3 +// Skipping 4 for PATH_PXB +// Skipping 5 for PATH_PHB +#define LINK_SYS 6 +#define LINK_NET 7 extern const char* topoLinkTypeStr[]; +#define PATH_LOC 0 +#define PATH_NVL 1 +#define PATH_NVB 2 +#define PATH_PIX 3 +#define PATH_PXB 4 +#define PATH_PHB 5 +#define PATH_SYS 6 +extern const char* topoPathTypeStr[]; + struct ncclTopoNode; struct ncclTopoLink { int type; - int width; + float width; struct ncclTopoNode* remNode; }; #define NCCL_TOPO_MAX_LINKS 32 #define NCCL_TOPO_MAX_HOPS (NCCL_TOPO_MAX_NODES*NCCL_TOPO_NODE_TYPES) -#define SELECT_PATH 1 -#define SELECT_LAST 2 - -#define NET_GDR_MASK 0x70000000 struct ncclTopoLinkList { struct ncclTopoLink* list[NCCL_TOPO_MAX_HOPS]; int count; - int width; + float width; int type; }; +#define NCCL_TOPO_CPU_INTEL_BDW 1 +#define NCCL_TOPO_CPU_INTEL_SKL 2 + +#define NCCL_TOPO_UNDEF (-1) + struct ncclTopoNode { int type; int64_t id; - int rank; + // Type specific data + union { + struct { + int dev; // NVML dev number + int rank; + int cudaCompCap; + int gdrSupport; + }gpu; + struct { + uint64_t asic; + int port; + float width; + int gdrSupport; + int collSupport; + int maxChannels; + }net; + struct { + int arch; + int vendor; + int model; + cpu_set_t affinity; + }cpu; + }; int nlinks; struct ncclTopoLink links[NCCL_TOPO_MAX_LINKS]; // Pre-computed paths to GPUs and NICs @@ -79,60 +119,54 @@ struct ncclTopoNodeSet { struct ncclTopoSystem { struct ncclTopoNodeSet nodes[NCCL_TOPO_NODE_TYPES]; - int maxSpeed; - int maxWidth; - int searchInitDone; + float maxWidth; + float totalWidth; }; -static ncclResult_t ncclTopoCreateNode(struct ncclTopoSystem* system, struct ncclTopoNode** node, int type, uint64_t id) { +ncclResult_t ncclTopoGetNode(struct ncclTopoSystem* system, struct ncclTopoNode** node, int type, uint64_t id); +ncclResult_t ncclTopoCreateNode(struct ncclTopoSystem* system, struct ncclTopoNode** node, int type, uint64_t id); +ncclResult_t ncclTopoRemoveNode(struct ncclTopoSystem* system, int type, int id); +ncclResult_t ncclTopoConnectNodes(struct ncclTopoNode* node, struct ncclTopoNode* remNode, int type, float width); +ncclResult_t ncclTopoPrintPaths(struct ncclTopoSystem* system); +ncclResult_t ncclTopoLoadSystem(const char* xmlTopoFile, struct ncclTopoSystem* system); + +ncclResult_t ncclTopoGetLocalNet(struct ncclTopoSystem* system, int rank, int64_t* id, int rr); + +ncclResult_t ncclTopoGetSystemFromXml(struct ncclXml* xml, struct ncclTopoSystem** topoSystem); +ncclResult_t ncclTopoGetGraphFromXml(struct ncclXmlNode *xmlGraphs, struct ncclTopoSystem* system, struct ncclTopoGraph* graph, int* nChannels); +ncclResult_t ncclTopoGetXmlFromGraphs(int ngraphs, struct ncclTopoGraph** graphs, struct ncclTopoSystem* system, struct ncclXml *xml); + +ncclResult_t ncclTopoGetCompCap(struct ncclTopoSystem* system, int* ccMin, int* ccMax); + +static ncclResult_t ncclTopoIdToIndex(struct ncclTopoSystem* system, int type, int64_t id, int* index) { + *index = -1; for (int i=0; i<system->nodes[type].count; i++) { if (system->nodes[type].nodes[i].id == id) { - *node = system->nodes[type].nodes+i; + *index = i; return ncclSuccess; } } - if (system->nodes[type].count == NCCL_TOPO_MAX_NODES) { - WARN("Error : tried to create too many nodes of type %d\n", type); - return ncclInternalError; - } - struct ncclTopoNode* n = system->nodes[type].nodes+system->nodes[type].count; - system->nodes[type].count++; - n->type = type; - n->id = id; - if (type == GPU) { - // Create link to itself (used in some corner cases) - n->nlinks=1; - n->links[0].type = LINK_LOC; - n->links[0].remNode = n; - n->links[0].width = LOC_WIDTH; - } - *node = n; - return ncclSuccess; + return ncclInternalError; } -static ncclResult_t ncclTopoConnectNodes(struct ncclTopoNode* node, struct ncclTopoNode* remNode, int type, int width) { - // Aggregate links into higher width for NVLink - struct ncclTopoLink* link; - for (link = node->links; link->remNode; link++) { - if (link->remNode == remNode && link->type == type) break; - } - if (link->remNode == NULL) node->nlinks++; - link->type = type; - link->remNode = remNode; - link->width += width; - - // Sort links in BW descending order - struct ncclTopoLink linkSave; - memcpy(&linkSave, link, sizeof(struct ncclTopoLink)); - while (link != node->links) { - if ((link-1)->width >= linkSave.width) break; - memcpy(link, link-1, sizeof(struct ncclTopoLink)); - link--; +static ncclResult_t ncclTopoRankToIndex(struct ncclTopoSystem* system, int rank, int* index) { + *index = -1; + for (int i=0; i<system->nodes[GPU].count; i++) { + if (system->nodes[GPU].nodes[i].gpu.rank == rank) { + *index = i; + return ncclSuccess; + } } - memcpy(link, &linkSave, sizeof(struct ncclTopoLink)); - return ncclSuccess; + return ncclInternalError; } -ncclResult_t ncclTopoPrintPaths(struct ncclTopoSystem* system); - +// Returns NVLink speed in GB/s +static float ncclTopoNVLinkSpeed(int cudaCompCap) { + return + cudaCompCap == 86 ? SM86_NVLINK_WIDTH : + cudaCompCap >= 80 ? SM80_NVLINK_WIDTH : + cudaCompCap >= 70 ? SM70_NVLINK_WIDTH : + cudaCompCap >= 60 ? SM60_NVLINK_WIDTH : + SM80_NVLINK_WIDTH; +} #endif |