Update CUEW to latest version

This brings separate initialization for libcuda and libnvrtc, which fixes Cycles nvrtc compilation not working on build machines without CUDA hardware available. Differential Revision: https://developer.blender.org/D3045
author: Brecht Van Lommel <brechtvanlommel@gmail.com> 2018-02-06 16:54:18 +0300
committer: Brecht Van Lommel <brechtvanlommel@gmail.com> 2018-02-07 13:53:01 +0300
commit: 1dafe759edc3bc9a89f99a894d1c1fb3a14f44a9 (patch)
tree: d4862fceb7c38873de5ce581d9cdbfdc0cb59423 /extern/cuew/src/cuew.c
parent: 38d35603f224e92c7f4dfe1802b0ebfceebec46a (diff)
1 files changed, 111 insertions, 35 deletions
diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c
index b68dc597049..329dfbad3aa 100644
--- a/extern/cuew/src/cuew.c
+++ b/extern/cuew/src/cuew.c
@@ -207,12 +207,17 @@ tcuEventSynchronize *cuEventSynchronize;
 tcuEventDestroy_v2 *cuEventDestroy_v2;
 tcuEventElapsedTime *cuEventElapsedTime;
 tcuStreamWaitValue32 *cuStreamWaitValue32;
+tcuStreamWaitValue64 *cuStreamWaitValue64;
 tcuStreamWriteValue32 *cuStreamWriteValue32;
+tcuStreamWriteValue64 *cuStreamWriteValue64;
 tcuStreamBatchMemOp *cuStreamBatchMemOp;
 tcuFuncGetAttribute *cuFuncGetAttribute;
+tcuFuncSetAttribute *cuFuncSetAttribute;
 tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
 tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
 tcuLaunchKernel *cuLaunchKernel;
+tcuLaunchCooperativeKernel *cuLaunchCooperativeKernel;
+tcuLaunchCooperativeKernelMultiDevice *cuLaunchCooperativeKernelMultiDevice;
 tcuFuncSetBlockShape *cuFuncSetBlockShape;
 tcuFuncSetSharedSize *cuFuncSetSharedSize;
 tcuParamSetSize *cuParamSetSize;
@@ -265,9 +270,9 @@ tcuSurfObjectCreate *cuSurfObjectCreate;
 tcuSurfObjectDestroy *cuSurfObjectDestroy;
 tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
 tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
-tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
 tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
 tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
+tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
 tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
 tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
 tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
@@ -315,34 +320,25 @@ static DynamicLibrary dynamic_library_open_find(const char **paths) {
   return NULL;
 }
 
-static void cuewExit(void) {
-  if(cuda_lib != NULL) {
+/* Implementation function. */
+static void cuewCudaExit(void) {
+  if (cuda_lib != NULL) {
     /*  Ignore errors. */
     dynamic_library_close(cuda_lib);
     cuda_lib = NULL;
   }
 }
 
-/* Implementation function. */
-int cuewInit(void) {
+static int cuewCudaInit(void) {
   /* Library paths. */
 #ifdef _WIN32
   /* Expected in c:/windows/system or similar, no path needed. */
   const char *cuda_paths[] = {"nvcuda.dll", NULL};
-  const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL};
 #elif defined(__APPLE__)
   /* Default installation path. */
   const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL};
-  const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
 #else
   const char *cuda_paths[] = {"libcuda.so", NULL};
-  const char *nvrtc_paths[] = {"libnvrtc.so",
-#  if defined(__x86_64__) || defined(_M_X64)
-                               "/usr/local/cuda/lib64/libnvrtc.so",
-#else
-                               "/usr/local/cuda/lib/libnvrtc.so",
-#endif
-                               NULL};
 #endif
   static int initialized = 0;
   static int result = 0;
@@ -354,7 +350,7 @@ int cuewInit(void) {
 
   initialized = 1;
 
-  error = atexit(cuewExit);
+  error = atexit(cuewCudaExit);
   if (error) {
     result = CUEW_ERROR_ATEXIT_FAILED;
     return result;
@@ -362,9 +358,7 @@ int cuewInit(void) {
 
   /* Load library. */
   cuda_lib = dynamic_library_open_find(cuda_paths);
-  nvrtc_lib = dynamic_library_open_find(nvrtc_paths);
 
-  /* CUDA library is mandatory to have, while nvrtc might be missing. */
   if (cuda_lib == NULL) {
     result = CUEW_ERROR_OPEN_FAILED;
     return result;
@@ -521,12 +515,17 @@ int cuewInit(void) {
   CUDA_LIBRARY_FIND(cuEventDestroy_v2);
   CUDA_LIBRARY_FIND(cuEventElapsedTime);
   CUDA_LIBRARY_FIND(cuStreamWaitValue32);
+  CUDA_LIBRARY_FIND(cuStreamWaitValue64);
   CUDA_LIBRARY_FIND(cuStreamWriteValue32);
+  CUDA_LIBRARY_FIND(cuStreamWriteValue64);
   CUDA_LIBRARY_FIND(cuStreamBatchMemOp);
   CUDA_LIBRARY_FIND(cuFuncGetAttribute);
+  CUDA_LIBRARY_FIND(cuFuncSetAttribute);
   CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
   CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig);
   CUDA_LIBRARY_FIND(cuLaunchKernel);
+  CUDA_LIBRARY_FIND(cuLaunchCooperativeKernel);
+  CUDA_LIBRARY_FIND(cuLaunchCooperativeKernelMultiDevice);
   CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
   CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
   CUDA_LIBRARY_FIND(cuParamSetSize);
@@ -579,9 +578,9 @@ int cuewInit(void) {
   CUDA_LIBRARY_FIND(cuSurfObjectDestroy);
   CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc);
   CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer);
-  CUDA_LIBRARY_FIND(cuDeviceGetP2PAttribute);
   CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess);
   CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess);
+  CUDA_LIBRARY_FIND(cuDeviceGetP2PAttribute);
   CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
   CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
   CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
@@ -604,27 +603,99 @@ int cuewInit(void) {
   CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2);
   CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync);
 
+  result = CUEW_SUCCESS;
+  return result;
+}
 
+static void cuewExitNvrtc(void) {
   if (nvrtc_lib != NULL) {
-    NVRTC_LIBRARY_FIND(nvrtcGetErrorString);
-    NVRTC_LIBRARY_FIND(nvrtcVersion);
-    NVRTC_LIBRARY_FIND(nvrtcCreateProgram);
-    NVRTC_LIBRARY_FIND(nvrtcDestroyProgram);
-    NVRTC_LIBRARY_FIND(nvrtcCompileProgram);
-    NVRTC_LIBRARY_FIND(nvrtcGetPTXSize);
-    NVRTC_LIBRARY_FIND(nvrtcGetPTX);
-    NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize);
-    NVRTC_LIBRARY_FIND(nvrtcGetProgramLog);
-    NVRTC_LIBRARY_FIND(nvrtcAddNameExpression);
-    NVRTC_LIBRARY_FIND(nvrtcGetLoweredName);
+    /*  Ignore errors. */
+    dynamic_library_close(nvrtc_lib);
+    nvrtc_lib = NULL;
   }
+}
+
+static int cuewNvrtcInit(void) {
+  /* Library paths. */
+#ifdef _WIN32
+  /* Expected in c:/windows/system or similar, no path needed. */
+  const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL};
+#elif defined(__APPLE__)
+  /* Default installation path. */
+  const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
+#else
+  const char *nvrtc_paths[] = {"libnvrtc.so",
+#  if defined(__x86_64__) || defined(_M_X64)
+                               "/usr/local/cuda/lib64/libnvrtc.so",
+#else
+                               "/usr/local/cuda/lib/libnvrtc.so",
+#endif
+                               NULL};
+#endif
+  static int initialized = 0;
+  static int result = 0;
+  int error;
+
+  if (initialized) {
+    return result;
+  }
+
+  initialized = 1;
+
+  error = atexit(cuewExitNvrtc);
+  if (error) {
+    result = CUEW_ERROR_ATEXIT_FAILED;
+    return result;
+  }
+
+  /* Load library. */
+  nvrtc_lib = dynamic_library_open_find(nvrtc_paths);
+
+  if (nvrtc_lib == NULL) {
+    result = CUEW_ERROR_OPEN_FAILED;
+    return result;
+  }
+
+  NVRTC_LIBRARY_FIND(nvrtcGetErrorString);
+  NVRTC_LIBRARY_FIND(nvrtcVersion);
+  NVRTC_LIBRARY_FIND(nvrtcCreateProgram);
+  NVRTC_LIBRARY_FIND(nvrtcDestroyProgram);
+  NVRTC_LIBRARY_FIND(nvrtcCompileProgram);
+  NVRTC_LIBRARY_FIND(nvrtcGetPTXSize);
+  NVRTC_LIBRARY_FIND(nvrtcGetPTX);
+  NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize);
+  NVRTC_LIBRARY_FIND(nvrtcGetProgramLog);
+  NVRTC_LIBRARY_FIND(nvrtcAddNameExpression);
+  NVRTC_LIBRARY_FIND(nvrtcGetLoweredName);
 
   result = CUEW_SUCCESS;
   return result;
 }
 
+
+int cuewInit(cuuint32_t flags) {
+	int result = CUEW_SUCCESS;
+
+	if (flags & CUEW_INIT_CUDA) {
+		result = cuewCudaInit();
+		if (result != CUEW_SUCCESS) {
+			return result;
+		}
+	}
+
+	if (flags & CUEW_INIT_NVRTC) {
+		result = cuewNvrtcInit();
+		if (result != CUEW_SUCCESS) {
+			return result;
+		}
+	}
+
+	return result;
+}
+
+
 const char *cuewErrorString(CUresult result) {
-  switch(result) {
+  switch (result) {
     case CUDA_SUCCESS: return "No errors";
     case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
     case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
@@ -655,6 +726,7 @@ const char *cuewErrorString(CUresult result) {
     case CUDA_ERROR_INVALID_PTX: return "Invalid ptx";
     case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context";
     case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable";
+    case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found";
     case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
     case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
     case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
@@ -681,6 +753,7 @@ const char *cuewErrorString(CUresult result) {
     case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space";
     case CUDA_ERROR_INVALID_PC: return "Invalid pc";
     case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
+    case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: return "Cooperative launch too large";
     case CUDA_ERROR_NOT_PERMITTED: return "Not permitted";
     case CUDA_ERROR_NOT_SUPPORTED: return "Not supported";
     case CUDA_ERROR_UNKNOWN: return "Unknown error";
@@ -738,14 +811,16 @@ const char *cuewCompilerPath(void) {
 
   if (binpath) {
     path_join(binpath, executable, sizeof(nvcc), nvcc);
-    if (path_exists(nvcc))
+    if (path_exists(nvcc)) {
       return nvcc;
+    }
   }
 
   for (i = 0; defaultpaths[i]; ++i) {
     path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
-    if (path_exists(nvcc))
+    if (path_exists(nvcc)) {
       return nvcc;
+    }
   }
 
 #ifndef _WIN32
@@ -756,9 +831,9 @@ const char *cuewCompilerPath(void) {
       int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
       buffer[len] = '\0';
       pclose(handle);
-
-      if (buffer[0])
+      if (buffer[0]) {
         return "nvcc";
+      }
     }
   }
 #endif
@@ -785,8 +860,9 @@ int cuewCompilerVersion(void) {
   char output[65536] = "\0";
   char command[65536] = "\0";
 
-  if (path == NULL)
+  if (path == NULL) {
     return 0;
+  }
 
   /* get --version output */
   strncpy(command, path, sizeof(command));
author	Brecht Van Lommel <brechtvanlommel@gmail.com>	2018-02-06 16:54:18 +0300
committer	Brecht Van Lommel <brechtvanlommel@gmail.com>	2018-02-07 13:53:01 +0300
commit	1dafe759edc3bc9a89f99a894d1c1fb3a14f44a9 (patch)
tree	d4862fceb7c38873de5ce581d9cdbfdc0cb59423 /extern/cuew/src/cuew.c
parent	38d35603f224e92c7f4dfe1802b0ebfceebec46a (diff)