Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Yeager <lyeager@nvidia.com>2020-01-08 00:29:57 +0300
committerLuke Yeager <lyeager@nvidia.com>2020-01-08 00:29:57 +0300
commit7a18fe07847300fbe7fec8d5512b3b44d8bc1716 (patch)
tree787fb8b1dd9df8cca71960c37bc60df55092dcbf
parentc7ba70ff90b357b40bf571ea3366d61e1249a0be (diff)
[topology] remove NET links when trimming system
This fixes a memory leak.
-rw-r--r--src/graph/paths.cc23
1 files changed, 23 insertions, 0 deletions
diff --git a/src/graph/paths.cc b/src/graph/paths.cc
index ce1772c..eba1964 100644
--- a/src/graph/paths.cc
+++ b/src/graph/paths.cc
@@ -179,11 +179,18 @@ static ncclResult_t addCpuStep(struct ncclTopoSystem* system, int c, int t1, int
// Remove/free paths for a given type
static void ncclTopoRemovePathType(struct ncclTopoSystem* system, int nodeType) {
for (int t=0; t<NCCL_TOPO_NODE_TYPES; t++) {
+ // Remove links _to_ the given type
for (int n=0; n<system->nodes[t].count; n++) {
struct ncclTopoNode* node = system->nodes[t].nodes+n;
free(node->paths[nodeType]);
node->paths[nodeType] = NULL;
}
+ // Remove links _from_ the given type
+ for (int n=0; n<system->nodes[nodeType].count; n++) {
+ struct ncclTopoNode* node = system->nodes[nodeType].nodes+n;
+ free(node->paths[t]);
+ node->paths[t] = NULL;
+ }
}
}
@@ -309,6 +316,22 @@ ncclResult_t ncclTopoTrimSystem(struct ncclTopoSystem* system, struct ncclComm*
// Trim network
ncclTopoRemovePathType(system, NET);
system->nodes[NET].count = 0;
+ for (int t=0; t<NCCL_TOPO_NODE_TYPES; t++) {
+ for (int n=0; n<system->nodes[t].count; n++) {
+ struct ncclTopoNode* node = system->nodes[t].nodes+n;
+ for (int l=0; l<node->nlinks; l++) {
+ struct ncclTopoLink* link = &(node->links[l]);
+ if (link->remNode->type == NET) {
+ // Remove the link
+ for (int i=l; i<(node->nlinks-1); i++) {
+ memcpy(&(node->links[i]), &(node->links[i+1]), sizeof(ncclTopoLink));
+ }
+ node->nlinks--;
+ l--; // revisit the same value of "l" for the next iteration, since we edited the list in the middle of the loop
+ }
+ }
+ }
+ }
}
free(domains);
free(ids);