Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/graph/topo.h')
-rw-r--r--src/graph/topo.h170
1 files changed, 102 insertions, 68 deletions
diff --git a/src/graph/topo.h b/src/graph/topo.h
index 6b8a2f9..a12bb2d 100644
--- a/src/graph/topo.h
+++ b/src/graph/topo.h
@@ -1,5 +1,5 @@
/*************************************************************************
- * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
@@ -9,22 +9,26 @@
#include "graph.h"
#include "core.h"
-
-#define LOC_WIDTH 5000
-#define PASCAL_NVLINK_WIDTH 18
-#define VOLTA_NVLINK_WIDTH 21
-#define PCI_WIDTH 12 // PCI Gen3 x16
-#define QPI_WIDTH 8
-#define SKL_QPI_WIDTH 12
-#define P9_WIDTH 32
-#define NET_WIDTH 12 // 100Gbit
-
-// Intel CPU convert GPU P2P traffic into 64B PCI TLPs, to GPU
-// to GPU traffic consumed more PCI bandwidth.
+#include <sched.h>
+
+#define LOC_WIDTH 5000.0
+#define SM60_NVLINK_WIDTH 18.0
+#define SM70_NVLINK_WIDTH 21.0
+#define SM80_NVLINK_WIDTH 21.0
+#define SM86_NVLINK_WIDTH 12.0
+#define PCI_WIDTH 12.0 // PCI Gen3 x16
+#define QPI_WIDTH 6.0
+#define SKL_QPI_WIDTH 9.0
+#define P9_WIDTH 32.0
+#define ARM_WIDTH 6.0
+#define NET_WIDTH 12.0 // 100Gbit
+
+// Intel CPU convert GPU P2P traffic into 64B PCI TLPs, so GPU
+// to GPU traffic consumes more PCI bandwidth.
#define INTEL_P2P(speed) (speed*9/12)
#define INTEL_P2P_OVERHEAD(speed) (speed*12/9)
-#define NCCL_TOPO_NODE_TYPES 6
+#define NCCL_TOPO_NODE_TYPES 7
#define GPU 0
#define PCI 1
#define NVS 2
@@ -33,37 +37,73 @@
#define NET 5
extern const char* topoNodeTypeStr[];
+// We want link types and path types to match as much as possible
#define LINK_LOC 0
#define LINK_NVL 1
-#define LINK_PCI 2
-#define LINK_QPI 3
-#define LINK_NET 4
+// Skipping 2 for PATH_NVB
+#define LINK_PCI 3
+// Skipping 4 for PATH_PXB
+// Skipping 5 for PATH_PHB
+#define LINK_SYS 6
+#define LINK_NET 7
extern const char* topoLinkTypeStr[];
+#define PATH_LOC 0
+#define PATH_NVL 1
+#define PATH_NVB 2
+#define PATH_PIX 3
+#define PATH_PXB 4
+#define PATH_PHB 5
+#define PATH_SYS 6
+extern const char* topoPathTypeStr[];
+
struct ncclTopoNode;
struct ncclTopoLink {
int type;
- int width;
+ float width;
struct ncclTopoNode* remNode;
};
#define NCCL_TOPO_MAX_LINKS 32
#define NCCL_TOPO_MAX_HOPS (NCCL_TOPO_MAX_NODES*NCCL_TOPO_NODE_TYPES)
-#define SELECT_PATH 1
-#define SELECT_LAST 2
-
-#define NET_GDR_MASK 0x70000000
struct ncclTopoLinkList {
struct ncclTopoLink* list[NCCL_TOPO_MAX_HOPS];
int count;
- int width;
+ float width;
int type;
};
+#define NCCL_TOPO_CPU_INTEL_BDW 1
+#define NCCL_TOPO_CPU_INTEL_SKL 2
+
+#define NCCL_TOPO_UNDEF (-1)
+
struct ncclTopoNode {
int type;
int64_t id;
- int rank;
+ // Type specific data
+ union {
+ struct {
+ int dev; // NVML dev number
+ int rank;
+ int cudaCompCap;
+ int gdrSupport;
+ }gpu;
+ struct {
+ uint64_t asic;
+ int port;
+ float width;
+ int gdrSupport;
+ int collSupport;
+ int maxChannels;
+ }net;
+ struct {
+ int arch;
+ int vendor;
+ int model;
+ cpu_set_t affinity;
+ }cpu;
+ };
int nlinks;
struct ncclTopoLink links[NCCL_TOPO_MAX_LINKS];
// Pre-computed paths to GPUs and NICs
@@ -79,60 +119,54 @@ struct ncclTopoNodeSet {
struct ncclTopoSystem {
struct ncclTopoNodeSet nodes[NCCL_TOPO_NODE_TYPES];
- int maxSpeed;
- int maxWidth;
- int searchInitDone;
+ float maxWidth;
+ float totalWidth;
};
-static ncclResult_t ncclTopoCreateNode(struct ncclTopoSystem* system, struct ncclTopoNode** node, int type, uint64_t id) {
+ncclResult_t ncclTopoGetNode(struct ncclTopoSystem* system, struct ncclTopoNode** node, int type, uint64_t id);
+ncclResult_t ncclTopoCreateNode(struct ncclTopoSystem* system, struct ncclTopoNode** node, int type, uint64_t id);
+ncclResult_t ncclTopoRemoveNode(struct ncclTopoSystem* system, int type, int id);
+ncclResult_t ncclTopoConnectNodes(struct ncclTopoNode* node, struct ncclTopoNode* remNode, int type, float width);
+ncclResult_t ncclTopoPrintPaths(struct ncclTopoSystem* system);
+ncclResult_t ncclTopoLoadSystem(const char* xmlTopoFile, struct ncclTopoSystem* system);
+
+ncclResult_t ncclTopoGetLocalNet(struct ncclTopoSystem* system, int rank, int64_t* id, int rr);
+
+ncclResult_t ncclTopoGetSystemFromXml(struct ncclXml* xml, struct ncclTopoSystem** topoSystem);
+ncclResult_t ncclTopoGetGraphFromXml(struct ncclXmlNode *xmlGraphs, struct ncclTopoSystem* system, struct ncclTopoGraph* graph, int* nChannels);
+ncclResult_t ncclTopoGetXmlFromGraphs(int ngraphs, struct ncclTopoGraph** graphs, struct ncclTopoSystem* system, struct ncclXml *xml);
+
+ncclResult_t ncclTopoGetCompCap(struct ncclTopoSystem* system, int* ccMin, int* ccMax);
+
+static ncclResult_t ncclTopoIdToIndex(struct ncclTopoSystem* system, int type, int64_t id, int* index) {
+ *index = -1;
for (int i=0; i<system->nodes[type].count; i++) {
if (system->nodes[type].nodes[i].id == id) {
- *node = system->nodes[type].nodes+i;
+ *index = i;
return ncclSuccess;
}
}
- if (system->nodes[type].count == NCCL_TOPO_MAX_NODES) {
- WARN("Error : tried to create too many nodes of type %d\n", type);
- return ncclInternalError;
- }
- struct ncclTopoNode* n = system->nodes[type].nodes+system->nodes[type].count;
- system->nodes[type].count++;
- n->type = type;
- n->id = id;
- if (type == GPU) {
- // Create link to itself (used in some corner cases)
- n->nlinks=1;
- n->links[0].type = LINK_LOC;
- n->links[0].remNode = n;
- n->links[0].width = LOC_WIDTH;
- }
- *node = n;
- return ncclSuccess;
+ return ncclInternalError;
}
-static ncclResult_t ncclTopoConnectNodes(struct ncclTopoNode* node, struct ncclTopoNode* remNode, int type, int width) {
- // Aggregate links into higher width for NVLink
- struct ncclTopoLink* link;
- for (link = node->links; link->remNode; link++) {
- if (link->remNode == remNode && link->type == type) break;
- }
- if (link->remNode == NULL) node->nlinks++;
- link->type = type;
- link->remNode = remNode;
- link->width += width;
-
- // Sort links in BW descending order
- struct ncclTopoLink linkSave;
- memcpy(&linkSave, link, sizeof(struct ncclTopoLink));
- while (link != node->links) {
- if ((link-1)->width >= linkSave.width) break;
- memcpy(link, link-1, sizeof(struct ncclTopoLink));
- link--;
+static ncclResult_t ncclTopoRankToIndex(struct ncclTopoSystem* system, int rank, int* index) {
+ *index = -1;
+ for (int i=0; i<system->nodes[GPU].count; i++) {
+ if (system->nodes[GPU].nodes[i].gpu.rank == rank) {
+ *index = i;
+ return ncclSuccess;
+ }
}
- memcpy(link, &linkSave, sizeof(struct ncclTopoLink));
- return ncclSuccess;
+ return ncclInternalError;
}
-ncclResult_t ncclTopoPrintPaths(struct ncclTopoSystem* system);
-
+// Returns NVLink speed in GB/s
+static float ncclTopoNVLinkSpeed(int cudaCompCap) {
+ return
+ cudaCompCap == 86 ? SM86_NVLINK_WIDTH :
+ cudaCompCap >= 80 ? SM80_NVLINK_WIDTH :
+ cudaCompCap >= 70 ? SM70_NVLINK_WIDTH :
+ cudaCompCap >= 60 ? SM60_NVLINK_WIDTH :
+ SM80_NVLINK_WIDTH;
+}
#endif