Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSylvain Jeaugey <sjeaugey@nvidia.com>2016-10-13 20:28:59 +0300
committerSylvain Jeaugey <sjeaugey@nvidia.com>2016-10-13 20:28:59 +0300
commitbf7d1514f779e685f8b0a9ab7cdbc9856cdd9370 (patch)
tree0bd5854a8444a94d3b02d79374ad497faec9254c
parent8bb06c94bec4b20ba91d85dcd344ed4d53c68770 (diff)
NVML (libwrap) : import the needed definitions
-rw-r--r--src/libwrap.cu40
-rw-r--r--src/libwrap.h35
2 files changed, 45 insertions, 30 deletions
diff --git a/src/libwrap.cu b/src/libwrap.cu
index 5cfa546..1ac19a6 100644
--- a/src/libwrap.cu
+++ b/src/libwrap.cu
@@ -8,16 +8,15 @@
#include <dlfcn.h>
#include "core.h"
-typedef enum { SUCCESS = 0 } RetCode;
int symbolsLoaded = 0;
-static RetCode (*nvmlInternalInit)(void);
-static RetCode (*nvmlInternalShutdown)(void);
-static RetCode (*nvmlInternalDeviceGetHandleByPciBusId)(const char* pciBusId, nvmlDevice_t* device);
-static RetCode (*nvmlInternalDeviceGetIndex)(nvmlDevice_t device, unsigned* index);
-static RetCode (*nvmlInternalDeviceSetCpuAffinity)(nvmlDevice_t device);
-static RetCode (*nvmlInternalDeviceClearCpuAffinity)(nvmlDevice_t device);
-static const char* (*nvmlInternalErrorString)(RetCode r);
+static nvmlReturn_t (*nvmlInternalInit)(void);
+static nvmlReturn_t (*nvmlInternalShutdown)(void);
+static nvmlReturn_t (*nvmlInternalDeviceGetHandleByPciBusId)(const char* pciBusId, nvmlDevice_t* device);
+static nvmlReturn_t (*nvmlInternalDeviceGetIndex)(nvmlDevice_t device, unsigned* index);
+static nvmlReturn_t (*nvmlInternalDeviceSetCpuAffinity)(nvmlDevice_t device);
+static nvmlReturn_t (*nvmlInternalDeviceClearCpuAffinity)(nvmlDevice_t device);
+static const char* (*nvmlInternalErrorString)(nvmlReturn_t r);
ncclResult_t wrapSymbols(void) {
@@ -76,8 +75,8 @@ ncclResult_t wrapNvmlInit(void) {
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
- RetCode ret = nvmlInternalInit();
- if (ret != SUCCESS) {
+ nvmlReturn_t ret = nvmlInternalInit();
+ if (ret != NVML_SUCCESS) {
WARN("nvmlInit() failed: %s",
nvmlInternalErrorString(ret));
return ncclSystemError;
@@ -90,8 +89,8 @@ ncclResult_t wrapNvmlShutdown(void) {
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
- RetCode ret = nvmlInternalShutdown();
- if (ret != SUCCESS) {
+ nvmlReturn_t ret = nvmlInternalShutdown();
+ if (ret != NVML_SUCCESS) {
WARN("nvmlShutdown() failed: %s ",
nvmlInternalErrorString(ret));
return ncclSystemError;
@@ -104,8 +103,8 @@ ncclResult_t wrapNvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
- RetCode ret = nvmlInternalDeviceGetHandleByPciBusId(pciBusId, device);
- if (ret != SUCCESS) {
+ nvmlReturn_t ret = nvmlInternalDeviceGetHandleByPciBusId(pciBusId, device);
+ if (ret != NVML_SUCCESS) {
WARN("nvmlDeviceGetHandleByPciBusId() failed: %s ",
nvmlInternalErrorString(ret));
return ncclSystemError;
@@ -118,8 +117,8 @@ ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index) {
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
- RetCode ret = nvmlInternalDeviceGetIndex(device, index);
- if (ret != SUCCESS) {
+ nvmlReturn_t ret = nvmlInternalDeviceGetIndex(device, index);
+ if (ret != NVML_SUCCESS) {
WARN("nvmlDeviceGetIndex() failed: %s ",
nvmlInternalErrorString(ret));
return ncclSystemError;
@@ -132,8 +131,8 @@ ncclResult_t wrapNvmlDeviceSetCpuAffinity(nvmlDevice_t device) {
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
- RetCode ret = nvmlInternalDeviceSetCpuAffinity(device);
- if (ret != SUCCESS) {
+ nvmlReturn_t ret = nvmlInternalDeviceSetCpuAffinity(device);
+ if (ret != NVML_SUCCESS) {
WARN("nvmlDeviceSetCpuAffinity() failed: %s ",
nvmlInternalErrorString(ret));
return ncclSystemError;
@@ -146,12 +145,11 @@ ncclResult_t wrapNvmlDeviceClearCpuAffinity(nvmlDevice_t device) {
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
- RetCode ret = nvmlInternalDeviceClearCpuAffinity(device);
- if (ret != SUCCESS) {
+ nvmlReturn_t ret = nvmlInternalDeviceClearCpuAffinity(device);
+ if (ret != NVML_SUCCESS) {
WARN("nvmlDeviceClearCpuAffinity() failed: %s ",
nvmlInternalErrorString(ret));
return ncclSystemError;
}
return ncclSuccess;
}
-
diff --git a/src/libwrap.h b/src/libwrap.h
index 9397392..cdce480 100644
--- a/src/libwrap.h
+++ b/src/libwrap.h
@@ -1,5 +1,5 @@
/*************************************************************************
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
@@ -12,16 +12,34 @@
#include "core.h"
+/* Extracted from nvml.h */
typedef struct nvmlDevice_st* nvmlDevice_t;
-/**
- * Generic enable/disable enum.
- */
-typedef enum nvmlEnableState_enum
+typedef enum nvmlReturn_enum
{
- NVML_FEATURE_DISABLED = 0, //!< Feature disabled
- NVML_FEATURE_ENABLED = 1 //!< Feature enabled
-} nvmlEnableState_t;
+ NVML_SUCCESS = 0, //!< The operation was successful
+ NVML_ERROR_UNINITIALIZED = 1, //!< NVML was not first initialized with nvmlInit()
+ NVML_ERROR_INVALID_ARGUMENT = 2, //!< A supplied argument is invalid
+ NVML_ERROR_NOT_SUPPORTED = 3, //!< The requested operation is not available on target device
+ NVML_ERROR_NO_PERMISSION = 4, //!< The current user does not have permission for operation
+ NVML_ERROR_ALREADY_INITIALIZED = 5, //!< Deprecated: Multiple initializations are now allowed through ref counting
+ NVML_ERROR_NOT_FOUND = 6, //!< A query to find an object was unsuccessful
+ NVML_ERROR_INSUFFICIENT_SIZE = 7, //!< An input argument is not large enough
+ NVML_ERROR_INSUFFICIENT_POWER = 8, //!< A device's external power cables are not properly attached
+ NVML_ERROR_DRIVER_NOT_LOADED = 9, //!< NVIDIA driver is not loaded
+ NVML_ERROR_TIMEOUT = 10, //!< User provided timeout passed
+ NVML_ERROR_IRQ_ISSUE = 11, //!< NVIDIA Kernel detected an interrupt issue with a GPU
+ NVML_ERROR_LIBRARY_NOT_FOUND = 12, //!< NVML Shared Library couldn't be found or loaded
+ NVML_ERROR_FUNCTION_NOT_FOUND = 13, //!< Local version of NVML doesn't implement this function
+ NVML_ERROR_CORRUPTED_INFOROM = 14, //!< infoROM is corrupted
+ NVML_ERROR_GPU_IS_LOST = 15, //!< The GPU has fallen off the bus or has otherwise become inaccessible
+ NVML_ERROR_RESET_REQUIRED = 16, //!< The GPU requires a reset before it can be used again
+ NVML_ERROR_OPERATING_SYSTEM = 17, //!< The GPU control device has been blocked by the operating system/cgroups
+ NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18, //!< RM detects a driver/library version mismatch
+ NVML_ERROR_IN_USE = 19, //!< An operation cannot be performed because the GPU is currently in use
+ NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred
+} nvmlReturn_t;
+/* End of nvml.h */
ncclResult_t wrapSymbols(void);
@@ -31,7 +49,6 @@ ncclResult_t wrapNvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_
ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index);
ncclResult_t wrapNvmlDeviceSetCpuAffinity(nvmlDevice_t device);
ncclResult_t wrapNvmlDeviceClearCpuAffinity(nvmlDevice_t device);
-ncclResult_t wrapNvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t *device);
#endif // End include guard