diff options
Diffstat (limited to 'extern/cuew/src/cuew.c')
-rw-r--r-- | extern/cuew/src/cuew.c | 146 |
1 files changed, 111 insertions, 35 deletions
diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c index b68dc597049..329dfbad3aa 100644 --- a/extern/cuew/src/cuew.c +++ b/extern/cuew/src/cuew.c @@ -207,12 +207,17 @@ tcuEventSynchronize *cuEventSynchronize; tcuEventDestroy_v2 *cuEventDestroy_v2; tcuEventElapsedTime *cuEventElapsedTime; tcuStreamWaitValue32 *cuStreamWaitValue32; +tcuStreamWaitValue64 *cuStreamWaitValue64; tcuStreamWriteValue32 *cuStreamWriteValue32; +tcuStreamWriteValue64 *cuStreamWriteValue64; tcuStreamBatchMemOp *cuStreamBatchMemOp; tcuFuncGetAttribute *cuFuncGetAttribute; +tcuFuncSetAttribute *cuFuncSetAttribute; tcuFuncSetCacheConfig *cuFuncSetCacheConfig; tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig; tcuLaunchKernel *cuLaunchKernel; +tcuLaunchCooperativeKernel *cuLaunchCooperativeKernel; +tcuLaunchCooperativeKernelMultiDevice *cuLaunchCooperativeKernelMultiDevice; tcuFuncSetBlockShape *cuFuncSetBlockShape; tcuFuncSetSharedSize *cuFuncSetSharedSize; tcuParamSetSize *cuParamSetSize; @@ -265,9 +270,9 @@ tcuSurfObjectCreate *cuSurfObjectCreate; tcuSurfObjectDestroy *cuSurfObjectDestroy; tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc; tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer; -tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute; tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess; tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess; +tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute; tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray; @@ -315,34 +320,25 @@ static DynamicLibrary dynamic_library_open_find(const char **paths) { return NULL; } -static void cuewExit(void) { - if(cuda_lib != NULL) { +/* Implementation function. */ +static void cuewCudaExit(void) { + if (cuda_lib != NULL) { /* Ignore errors. */ dynamic_library_close(cuda_lib); cuda_lib = NULL; } } -/* Implementation function. */ -int cuewInit(void) { +static int cuewCudaInit(void) { /* Library paths. */ #ifdef _WIN32 /* Expected in c:/windows/system or similar, no path needed. */ const char *cuda_paths[] = {"nvcuda.dll", NULL}; - const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL}; #elif defined(__APPLE__) /* Default installation path. */ const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL}; - const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL}; #else const char *cuda_paths[] = {"libcuda.so", NULL}; - const char *nvrtc_paths[] = {"libnvrtc.so", -# if defined(__x86_64__) || defined(_M_X64) - "/usr/local/cuda/lib64/libnvrtc.so", -#else - "/usr/local/cuda/lib/libnvrtc.so", -#endif - NULL}; #endif static int initialized = 0; static int result = 0; @@ -354,7 +350,7 @@ int cuewInit(void) { initialized = 1; - error = atexit(cuewExit); + error = atexit(cuewCudaExit); if (error) { result = CUEW_ERROR_ATEXIT_FAILED; return result; @@ -362,9 +358,7 @@ int cuewInit(void) { /* Load library. */ cuda_lib = dynamic_library_open_find(cuda_paths); - nvrtc_lib = dynamic_library_open_find(nvrtc_paths); - /* CUDA library is mandatory to have, while nvrtc might be missing. */ if (cuda_lib == NULL) { result = CUEW_ERROR_OPEN_FAILED; return result; @@ -521,12 +515,17 @@ int cuewInit(void) { CUDA_LIBRARY_FIND(cuEventDestroy_v2); CUDA_LIBRARY_FIND(cuEventElapsedTime); CUDA_LIBRARY_FIND(cuStreamWaitValue32); + CUDA_LIBRARY_FIND(cuStreamWaitValue64); CUDA_LIBRARY_FIND(cuStreamWriteValue32); + CUDA_LIBRARY_FIND(cuStreamWriteValue64); CUDA_LIBRARY_FIND(cuStreamBatchMemOp); CUDA_LIBRARY_FIND(cuFuncGetAttribute); + CUDA_LIBRARY_FIND(cuFuncSetAttribute); CUDA_LIBRARY_FIND(cuFuncSetCacheConfig); CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig); CUDA_LIBRARY_FIND(cuLaunchKernel); + CUDA_LIBRARY_FIND(cuLaunchCooperativeKernel); + CUDA_LIBRARY_FIND(cuLaunchCooperativeKernelMultiDevice); CUDA_LIBRARY_FIND(cuFuncSetBlockShape); CUDA_LIBRARY_FIND(cuFuncSetSharedSize); CUDA_LIBRARY_FIND(cuParamSetSize); @@ -579,9 +578,9 @@ int cuewInit(void) { CUDA_LIBRARY_FIND(cuSurfObjectDestroy); CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc); CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer); - CUDA_LIBRARY_FIND(cuDeviceGetP2PAttribute); CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess); CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess); + CUDA_LIBRARY_FIND(cuDeviceGetP2PAttribute); CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource); CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray); CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray); @@ -604,27 +603,99 @@ int cuewInit(void) { CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2); CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync); + result = CUEW_SUCCESS; + return result; +} +static void cuewExitNvrtc(void) { if (nvrtc_lib != NULL) { - NVRTC_LIBRARY_FIND(nvrtcGetErrorString); - NVRTC_LIBRARY_FIND(nvrtcVersion); - NVRTC_LIBRARY_FIND(nvrtcCreateProgram); - NVRTC_LIBRARY_FIND(nvrtcDestroyProgram); - NVRTC_LIBRARY_FIND(nvrtcCompileProgram); - NVRTC_LIBRARY_FIND(nvrtcGetPTXSize); - NVRTC_LIBRARY_FIND(nvrtcGetPTX); - NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize); - NVRTC_LIBRARY_FIND(nvrtcGetProgramLog); - NVRTC_LIBRARY_FIND(nvrtcAddNameExpression); - NVRTC_LIBRARY_FIND(nvrtcGetLoweredName); + /* Ignore errors. */ + dynamic_library_close(nvrtc_lib); + nvrtc_lib = NULL; } +} + +static int cuewNvrtcInit(void) { + /* Library paths. */ +#ifdef _WIN32 + /* Expected in c:/windows/system or similar, no path needed. */ + const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL}; +#elif defined(__APPLE__) + /* Default installation path. */ + const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL}; +#else + const char *nvrtc_paths[] = {"libnvrtc.so", +# if defined(__x86_64__) || defined(_M_X64) + "/usr/local/cuda/lib64/libnvrtc.so", +#else + "/usr/local/cuda/lib/libnvrtc.so", +#endif + NULL}; +#endif + static int initialized = 0; + static int result = 0; + int error; + + if (initialized) { + return result; + } + + initialized = 1; + + error = atexit(cuewExitNvrtc); + if (error) { + result = CUEW_ERROR_ATEXIT_FAILED; + return result; + } + + /* Load library. */ + nvrtc_lib = dynamic_library_open_find(nvrtc_paths); + + if (nvrtc_lib == NULL) { + result = CUEW_ERROR_OPEN_FAILED; + return result; + } + + NVRTC_LIBRARY_FIND(nvrtcGetErrorString); + NVRTC_LIBRARY_FIND(nvrtcVersion); + NVRTC_LIBRARY_FIND(nvrtcCreateProgram); + NVRTC_LIBRARY_FIND(nvrtcDestroyProgram); + NVRTC_LIBRARY_FIND(nvrtcCompileProgram); + NVRTC_LIBRARY_FIND(nvrtcGetPTXSize); + NVRTC_LIBRARY_FIND(nvrtcGetPTX); + NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize); + NVRTC_LIBRARY_FIND(nvrtcGetProgramLog); + NVRTC_LIBRARY_FIND(nvrtcAddNameExpression); + NVRTC_LIBRARY_FIND(nvrtcGetLoweredName); result = CUEW_SUCCESS; return result; } + +int cuewInit(cuuint32_t flags) { + int result = CUEW_SUCCESS; + + if (flags & CUEW_INIT_CUDA) { + result = cuewCudaInit(); + if (result != CUEW_SUCCESS) { + return result; + } + } + + if (flags & CUEW_INIT_NVRTC) { + result = cuewNvrtcInit(); + if (result != CUEW_SUCCESS) { + return result; + } + } + + return result; +} + + const char *cuewErrorString(CUresult result) { - switch(result) { + switch (result) { case CUDA_SUCCESS: return "No errors"; case CUDA_ERROR_INVALID_VALUE: return "Invalid value"; case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory"; @@ -655,6 +726,7 @@ const char *cuewErrorString(CUresult result) { case CUDA_ERROR_INVALID_PTX: return "Invalid ptx"; case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context"; case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable"; + case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found"; case CUDA_ERROR_INVALID_SOURCE: return "Invalid source"; case CUDA_ERROR_FILE_NOT_FOUND: return "File not found"; case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve"; @@ -681,6 +753,7 @@ const char *cuewErrorString(CUresult result) { case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space"; case CUDA_ERROR_INVALID_PC: return "Invalid pc"; case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed"; + case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: return "Cooperative launch too large"; case CUDA_ERROR_NOT_PERMITTED: return "Not permitted"; case CUDA_ERROR_NOT_SUPPORTED: return "Not supported"; case CUDA_ERROR_UNKNOWN: return "Unknown error"; @@ -738,14 +811,16 @@ const char *cuewCompilerPath(void) { if (binpath) { path_join(binpath, executable, sizeof(nvcc), nvcc); - if (path_exists(nvcc)) + if (path_exists(nvcc)) { return nvcc; + } } for (i = 0; defaultpaths[i]; ++i) { path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc); - if (path_exists(nvcc)) + if (path_exists(nvcc)) { return nvcc; + } } #ifndef _WIN32 @@ -756,9 +831,9 @@ const char *cuewCompilerPath(void) { int len = fread(buffer, 1, sizeof(buffer) - 1, handle); buffer[len] = '\0'; pclose(handle); - - if (buffer[0]) + if (buffer[0]) { return "nvcc"; + } } } #endif @@ -785,8 +860,9 @@ int cuewCompilerVersion(void) { char output[65536] = "\0"; char command[65536] = "\0"; - if (path == NULL) + if (path == NULL) { return 0; + } /* get --version output */ strncpy(command, path, sizeof(command)); |