diff options
author | Hirochika Asai <panda@jar.jp> | 2019-07-10 00:45:41 +0300 |
---|---|---|
committer | Sylvain Jeaugey <sjeaugey@nvidia.com> | 2019-07-10 00:45:41 +0300 |
commit | 0b192d2299146e64a096aee16f8b8f7638d2d9d4 (patch) | |
tree | 0bdf2912ebb9e65e0cad723c274f86becb1bd411 | |
parent | 8e04d80382cc8bee10b70652dfdaf4e9a06f53dc (diff) |
Add the exact matching modifier support "=" to the NCCL_IB_HCA variable (#236)
Perform exact matching when the prefix "=" is specified in the NCCL_IB_HCA variable to exclude HCAs mlx5_X[0-9]+ when mlx5_X is specified.
-rw-r--r-- | src/include/socket.h | 3 | ||||
-rw-r--r-- | src/include/utils.h | 2 | ||||
-rw-r--r-- | src/misc/utils.cc | 14 | ||||
-rw-r--r-- | src/transport/net_ib.cc | 3 |
4 files changed, 13 insertions, 9 deletions
diff --git a/src/include/socket.h b/src/include/socket.h index 8197a65..68ce235 100644 --- a/src/include/socket.h +++ b/src/include/socket.h @@ -66,6 +66,7 @@ static int findInterfaces(const char* prefixList, char* names, union socketAddre #endif struct netIf userIfs[MAX_IFS]; bool searchNot = prefixList && prefixList[0] == '^'; + bool searchExact = prefixList && prefixList[0] == '='; int nUserIfs = parseStringList(prefixList, userIfs, MAX_IFS); int found = 0; @@ -92,7 +93,7 @@ static int findInterfaces(const char* prefixList, char* names, union socketAddre } // check against user specified interfaces - if (!(matchIfList(interface->ifa_name, -1, userIfs, nUserIfs) ^ searchNot)) { + if (!(matchIfList(interface->ifa_name, -1, userIfs, nUserIfs, searchExact) ^ searchNot)) { continue; } diff --git a/src/include/utils.h b/src/include/utils.h index 29b72ad..93e72c8 100644 --- a/src/include/utils.h +++ b/src/include/utils.h @@ -20,6 +20,6 @@ struct netIf { }; int parseStringList(const char* string, struct netIf* ifList, int maxList); -bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize); +bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact); #endif diff --git a/src/misc/utils.cc b/src/misc/utils.cc index 5e884ae..5093755 100644 --- a/src/misc/utils.cc +++ b/src/misc/utils.cc @@ -147,8 +147,8 @@ int parseStringList(const char* string, struct netIf* ifList, int maxList) { if (!string) return 0; const char* ptr = string; - // Ignore "^" prefix, will be detected outside of this function - if (ptr[0] == '^') ptr++; + // Ignore "^" or "=" prefix, will be detected outside of this function + if (ptr[0] == '^' || ptr[0] == '=') ptr++; int ifNum = 0; int ifC = 0; @@ -177,8 +177,10 @@ int parseStringList(const char* string, struct netIf* ifList, int maxList) { return ifNum; } -static bool matchPrefix(const char* string, const char* prefix) { - return (strncmp(string, prefix, strlen(prefix)) == 0); +static bool matchIf(const char* string, const char* ref, bool matchExact) { + // Make sure to include '\0' in the exact case + int matchLen = matchExact ? strlen(string) + 1 : strlen(ref); + return strncmp(string, ref, matchLen) == 0; } static bool matchPort(const int port1, const int port2) { @@ -189,12 +191,12 @@ static bool matchPort(const int port1, const int port2) { } -bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize) { +bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact) { // Make an exception for the case where no user list is defined if (listSize == 0) return true; for (int i=0; i<listSize; i++) { - if (matchPrefix(string, ifList[i].prefix) + if (matchIf(string, ifList[i].prefix, matchExact) && matchPort(port, ifList[i].port)) { return true; } diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc index de72f89..43d8e35 100644 --- a/src/transport/net_ib.cc +++ b/src/transport/net_ib.cc @@ -107,6 +107,7 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) { char* userIbEnv = getenv("NCCL_IB_HCA"); struct netIf userIfs[MAX_IB_DEVS]; bool searchNot = userIbEnv && userIbEnv[0] == '^'; + bool searchExact = userIbEnv && userIbEnv[0] == '='; int nUserIfs = parseStringList(userIbEnv, userIfs, MAX_IB_DEVS); if (ncclSuccess != wrap_ibv_get_device_list(&devices, &nIbDevs)) return ncclInternalError; @@ -136,7 +137,7 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) { && portAttr.link_layer != IBV_LINK_LAYER_ETHERNET) continue; // check against user specified HCAs/ports - if (! (matchIfList(devices[d]->name, port, userIfs, nUserIfs) ^ searchNot)) { + if (! (matchIfList(devices[d]->name, port, userIfs, nUserIfs, searchExact) ^ searchNot)) { continue; } TRACE(NCCL_INIT|NCCL_NET,"NET/IB: [%d] %s:%d/%s ", d, devices[d]->name, port, |