diff options
Diffstat (limited to 'src/graph/topo.h')
-rw-r--r-- | src/graph/topo.h | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/src/graph/topo.h b/src/graph/topo.h new file mode 100644 index 0000000..6b8a2f9 --- /dev/null +++ b/src/graph/topo.h @@ -0,0 +1,138 @@ +/************************************************************************* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * See LICENSE.txt for license information + ************************************************************************/ + +#ifndef NCCL_TOPO_H_ +#define NCCL_TOPO_H_ + +#include "graph.h" +#include "core.h" + +#define LOC_WIDTH 5000 +#define PASCAL_NVLINK_WIDTH 18 +#define VOLTA_NVLINK_WIDTH 21 +#define PCI_WIDTH 12 // PCI Gen3 x16 +#define QPI_WIDTH 8 +#define SKL_QPI_WIDTH 12 +#define P9_WIDTH 32 +#define NET_WIDTH 12 // 100Gbit + +// Intel CPU convert GPU P2P traffic into 64B PCI TLPs, to GPU +// to GPU traffic consumed more PCI bandwidth. +#define INTEL_P2P(speed) (speed*9/12) +#define INTEL_P2P_OVERHEAD(speed) (speed*12/9) + +#define NCCL_TOPO_NODE_TYPES 6 +#define GPU 0 +#define PCI 1 +#define NVS 2 +#define CPU 3 // Actually NUMA domains +#define NIC 4 +#define NET 5 +extern const char* topoNodeTypeStr[]; + +#define LINK_LOC 0 +#define LINK_NVL 1 +#define LINK_PCI 2 +#define LINK_QPI 3 +#define LINK_NET 4 +extern const char* topoLinkTypeStr[]; + +struct ncclTopoNode; +struct ncclTopoLink { + int type; + int width; + struct ncclTopoNode* remNode; +}; +#define NCCL_TOPO_MAX_LINKS 32 +#define NCCL_TOPO_MAX_HOPS (NCCL_TOPO_MAX_NODES*NCCL_TOPO_NODE_TYPES) +#define SELECT_PATH 1 +#define SELECT_LAST 2 + +#define NET_GDR_MASK 0x70000000 + +struct ncclTopoLinkList { + struct ncclTopoLink* list[NCCL_TOPO_MAX_HOPS]; + int count; + int width; + int type; +}; + +struct ncclTopoNode { + int type; + int64_t id; + int rank; + int nlinks; + struct ncclTopoLink links[NCCL_TOPO_MAX_LINKS]; + // Pre-computed paths to GPUs and NICs + struct ncclTopoLinkList* paths[NCCL_TOPO_NODE_TYPES]; + // Used during search + uint64_t used; +}; + +struct ncclTopoNodeSet { + int count; + struct ncclTopoNode nodes[NCCL_TOPO_MAX_NODES]; +}; + +struct ncclTopoSystem { + struct ncclTopoNodeSet nodes[NCCL_TOPO_NODE_TYPES]; + int maxSpeed; + int maxWidth; + int searchInitDone; +}; + +static ncclResult_t ncclTopoCreateNode(struct ncclTopoSystem* system, struct ncclTopoNode** node, int type, uint64_t id) { + for (int i=0; i<system->nodes[type].count; i++) { + if (system->nodes[type].nodes[i].id == id) { + *node = system->nodes[type].nodes+i; + return ncclSuccess; + } + } + if (system->nodes[type].count == NCCL_TOPO_MAX_NODES) { + WARN("Error : tried to create too many nodes of type %d\n", type); + return ncclInternalError; + } + struct ncclTopoNode* n = system->nodes[type].nodes+system->nodes[type].count; + system->nodes[type].count++; + n->type = type; + n->id = id; + if (type == GPU) { + // Create link to itself (used in some corner cases) + n->nlinks=1; + n->links[0].type = LINK_LOC; + n->links[0].remNode = n; + n->links[0].width = LOC_WIDTH; + } + *node = n; + return ncclSuccess; +} + +static ncclResult_t ncclTopoConnectNodes(struct ncclTopoNode* node, struct ncclTopoNode* remNode, int type, int width) { + // Aggregate links into higher width for NVLink + struct ncclTopoLink* link; + for (link = node->links; link->remNode; link++) { + if (link->remNode == remNode && link->type == type) break; + } + if (link->remNode == NULL) node->nlinks++; + link->type = type; + link->remNode = remNode; + link->width += width; + + // Sort links in BW descending order + struct ncclTopoLink linkSave; + memcpy(&linkSave, link, sizeof(struct ncclTopoLink)); + while (link != node->links) { + if ((link-1)->width >= linkSave.width) break; + memcpy(link, link-1, sizeof(struct ncclTopoLink)); + link--; + } + memcpy(link, &linkSave, sizeof(struct ncclTopoLink)); + return ncclSuccess; +} + +ncclResult_t ncclTopoPrintPaths(struct ncclTopoSystem* system); + +#endif |