Welcome to mirror list, hosted at ThFree Co, Russian Federation.

topo.h « include « src - github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 24d7e9d28e16615a9cf21b365c247f4fa37798ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/*************************************************************************
 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
 *
 * See LICENSE.txt for license information
 ************************************************************************/

#ifndef NCCL_TOPO_H_
#define NCCL_TOPO_H_

#include "nccl.h"
#include <ctype.h>

#define MAXPATHSIZE 1024

static ncclResult_t getCudaPath(int cudaDev, char** path) {
  char busId[16];
  CUDACHECK(cudaDeviceGetPCIBusId(busId, 16, cudaDev));
  for (int i=0; i<16; i++) busId[i] = tolower(busId[i]);
  char busPath[] = "/sys/class/pci_bus/0000:00/device";
  memcpy(busPath+sizeof("/sys/class/pci_bus/")-1, busId, sizeof("0000:00")-1);
  char* cudaRpath = realpath(busPath, NULL);
  char pathname[MAXPATHSIZE];
  strncpy(pathname, cudaRpath, MAXPATHSIZE);
  strncpy(pathname+strlen(pathname), "/", MAXPATHSIZE-strlen(pathname));
  strncpy(pathname+strlen(pathname), busId, MAXPATHSIZE-strlen(pathname));
  free(cudaRpath);
  *path = realpath(pathname, NULL);
  if (*path == NULL) {
    WARN("Could not find real path of %s", pathname);
    return ncclSystemError;
  }
  return ncclSuccess;
}

static ncclResult_t getMlxPath(char* ibName, char** path) {
  char devicepath[MAXPATHSIZE];
  snprintf(devicepath, MAXPATHSIZE, "/sys/class/infiniband/%s/device", ibName);
  *path = realpath(devicepath, NULL);
  if (*path == NULL) {
    WARN("Could not find real path of %s", devicepath);
    return ncclSystemError;
  }
  return ncclSuccess;
}

static ncclResult_t getSockPath(char* ifName, char** path) {
  char devicepath[MAXPATHSIZE];
  snprintf(devicepath, MAXPATHSIZE, "/sys/class/net/%s/device", ifName);
  *path = realpath(devicepath, NULL);
  if (*path == NULL) {
    INFO(NET|INIT, "Could not find real path of %s", devicepath);
    return ncclSystemError;
  }
  return ncclSuccess;
}

enum ncclIbPathDist {
  PATH_PIX = 0,
  PATH_PXB = 1,
  PATH_PHB = 2,
  PATH_SOC = 3
};

static const char* pathDists[] = { "PIX", "PXB", "PHB", "SOC" };

static int pciDistance(char* path1, char* path2) {
  int score = 0;
  int depth = 0;
  int same = 1;
  for (int i=0; i<strlen(path1); i++) {
    if (path1[i] != path2[i]) same = 0;
    if (path1[i] == '/') {
      depth++;
      if (same == 1) score++;
    }
  }
  if (score == 3) return PATH_SOC;
  if (score == 4) return PATH_PHB;
  if (score == depth-1) return PATH_PIX;
  return PATH_PXB;
}

#endif