From 6046a8652273f336c44f63d22a8ec74ad5214832 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Thu, 31 Dec 2015 16:02:24 +0500 Subject: Update CUDA wrangler to latest upstream Brings support of NVRTC bindings and also makes it easier to tweak libraries paths and use multiple alternative names for libraries. --- extern/cuew/auto/cuew_gen.py | 80 ++++++++++++++++++++++++++++++++++---------- extern/cuew/include/cuew.h | 34 ++++++++++++++++++- extern/cuew/src/cuew.c | 80 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 161 insertions(+), 33 deletions(-) (limited to 'extern') diff --git a/extern/cuew/auto/cuew_gen.py b/extern/cuew/auto/cuew_gen.py index 75e5bf876f4..6cc48e4f809 100644 --- a/extern/cuew/auto/cuew_gen.py +++ b/extern/cuew/auto/cuew_gen.py @@ -49,7 +49,7 @@ COPYRIGHT = """/* * See the License for the specific language governing permissions and * limitations under the License */""" -FILES = ["cuda.h", "cudaGL.h"] +FILES = ["cuda.h", "cudaGL.h", 'nvrtc.h'] TYPEDEFS = [] FUNC_TYPEDEFS = [] @@ -89,7 +89,10 @@ class FuncDefVisitor(c_ast.NodeVisitor): self.indent -= 1 return "union {\n" + union + (" " * self.indent) + "}" elif isinstance(node, c_ast.Enum): - return 'enum ' + node.name + if node.name is not None: + return 'enum ' + node.name + else: + return 'enum ' elif isinstance(node, c_ast.TypeDecl): return self._get_ident_type(node.type) else: @@ -268,7 +271,9 @@ def parse_files(): token = line.split() if token[0] not in ("__cuda_cuda_h__", "CUDA_CB", - "CUDAAPI"): + "CUDAAPI", + "CUDAGL_H", + "__NVRTC_H__"): DEFINES.append(token) for line in lines: @@ -403,7 +408,7 @@ def print_dl_wrapper(): typedef HMODULE DynamicLibrary; -# define dynamic_library_open(path) LoadLibrary(path) +# define dynamic_library_open(path) LoadLibraryA(path) # define dynamic_library_close(lib) FreeLibrary(lib) # define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol) #else @@ -419,23 +424,44 @@ typedef void* DynamicLibrary; def print_dl_helper_macro(): - print("""#define %s_LIBRARY_FIND_CHECKED(name) \\ + print("""#define _LIBRARY_FIND_CHECKED(lib, name) \\ name = (t##name *)dynamic_library_find(lib, #name); \\ assert(name); -#define %s_LIBRARY_FIND(name) \\ +#define _LIBRARY_FIND(lib, name) \\ name = (t##name *)dynamic_library_find(lib, #name); -static DynamicLibrary lib;""" % (REAL_LIB, REAL_LIB)) +#define %s_LIBRARY_FIND_CHECKED(name) \\ + _LIBRARY_FIND_CHECKED(cuda_lib, name) +#define %s_LIBRARY_FIND(name) _LIBRARY_FIND(cuda_lib, name) + +#define NVRTC_LIBRARY_FIND_CHECKED(name) \\ + _LIBRARY_FIND_CHECKED(nvrtc_lib, name) +#define NVRTC_LIBRARY_FIND(name) _LIBRARY_FIND(nvrtc_lib, name) + +static DynamicLibrary cuda_lib; +static DynamicLibrary nvrtc_lib;""" % (REAL_LIB, REAL_LIB)) print("") -def print_dl_close(): - print("""static void %sExit(void) { - if(lib != NULL) { +def print_dl_helpers(): + print("""static DynamicLibrary dynamic_library_open_find(const char **paths) { + int i = 0; + while (paths[i] != NULL) { + DynamicLibrary lib = dynamic_library_open(paths[i]); + if (lib != NULL) { + return lib; + } + ++i; + } + return NULL; +} + +static void %sExit(void) { + if(cuda_lib != NULL) { /* Ignore errors. */ - dynamic_library_close(lib); - lib = NULL; + dynamic_library_close(cuda_lib); + cuda_lib = NULL; } }""" % (LIB.lower())) print("") @@ -445,12 +471,21 @@ def print_lib_path(): # TODO(sergey): get rid of hardcoded libraries. print("""#ifdef _WIN32 /* Expected in c:/windows/system or similar, no path needed. */ - const char *path = "nvcuda.dll"; + const char *cuda_paths[] = {"nvcuda.dll", NULL}; + const char *nvrtc_paths[] = {"nvrtc.dll", NULL}; #elif defined(__APPLE__) /* Default installation path. */ - const char *path = "/usr/local/cuda/lib/libcuda.dylib"; + const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL}; + const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL}; #else - const char *path = "libcuda.so"; + const char *cuda_paths[] = {"libcuda.so", NULL}; + const char *nvrtc_paths[] = {"libnvrtc.so", +# if defined(__x86_64__) || defined(_M_X64) + "/usr/local/cuda/lib64/libnvrtc.so", +#else + "/usr/local/cuda/lib/libnvrtc.so", +#endif + NULL}; #endif""") @@ -472,9 +507,11 @@ def print_init_guard(): } /* Load library. */ - lib = dynamic_library_open(path); + cuda_lib = dynamic_library_open_find(cuda_paths); + nvrtc_lib = dynamic_library_open_find(nvrtc_paths); - if (lib == NULL) { + /* CUDA library is mandatory to have, while nvrtc might be missing. */ + if (cuda_lib == NULL) { result = CUEW_ERROR_OPEN_FAILED; return result; }""") @@ -509,10 +546,17 @@ def print_dl_init(): print(" /* Fetch all function pointers. */") for symbol in SYMBOLS: if symbol: + if not symbol.startswith('nvrtc'): print(" %s_LIBRARY_FIND(%s);" % (REAL_LIB, symbol)) else: print("") + print(" if (nvrtc_lib != NULL) {") + for symbol in SYMBOLS: + if symbol and symbol.startswith('nvrtc'): + print(" NVRTC_LIBRARY_FIND(%s);" % (symbol)) + print(" }") + print("") print(" result = CUEW_SUCCESS;") print(" return result;") @@ -549,7 +593,7 @@ def print_implementation(): print("") print("") - print_dl_close() + print_dl_helpers() print("/* Implementation function. */") print_dl_init() diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h index 1b12e5b4463..47b19b4b3a5 100644 --- a/extern/cuew/include/cuew.h +++ b/extern/cuew/include/cuew.h @@ -51,7 +51,6 @@ extern "C" { #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) #define CU_PARAM_TR_DEFAULT -1 -#define CUDAGL_H /* Functions which changed 3.1 -> 3.2 for 64 bit stuff, * the cuda library has both the old ones for compatibility and new @@ -728,6 +727,19 @@ typedef enum CUGLmap_flags_enum { CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02, } CUGLmap_flags; +typedef enum { + NVRTC_SUCCESS = 0, + NVRTC_ERROR_OUT_OF_MEMORY = 1, + NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, + NVRTC_ERROR_INVALID_INPUT = 3, + NVRTC_ERROR_INVALID_PROGRAM = 4, + NVRTC_ERROR_INVALID_OPTION = 5, + NVRTC_ERROR_COMPILATION = 6, + NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, +} nvrtcResult; + +typedef struct _nvrtcProgram* nvrtcProgram; + #ifdef _WIN32 # define CUDAAPI __stdcall # define CUDA_CB __stdcall @@ -947,6 +959,16 @@ typedef CUresult CUDAAPI tcuGLSetBufferObjectMapFlags(GLuint buffer, unsigned Fl typedef CUresult CUDAAPI tcuGLMapBufferObjectAsync_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer, CUstream hStream); typedef CUresult CUDAAPI tcuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream); +typedef const char* CUDAAPI tnvrtcGetErrorString(nvrtcResult result); +typedef nvrtcResult CUDAAPI tnvrtcVersion(int* major, int* minor); +typedef nvrtcResult CUDAAPI tnvrtcCreateProgram(nvrtcProgram* prog, const char* src, const char* name, int numHeaders, const char* headers, const char* includeNames); +typedef nvrtcResult CUDAAPI tnvrtcDestroyProgram(nvrtcProgram* prog); +typedef nvrtcResult CUDAAPI tnvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char* options); +typedef nvrtcResult CUDAAPI tnvrtcGetPTXSize(nvrtcProgram prog, size_t* ptxSizeRet); +typedef nvrtcResult CUDAAPI tnvrtcGetPTX(nvrtcProgram prog, char* ptx); +typedef nvrtcResult CUDAAPI tnvrtcGetProgramLogSize(nvrtcProgram prog, size_t* logSizeRet); +typedef nvrtcResult CUDAAPI tnvrtcGetProgramLog(nvrtcProgram prog, char* log); + /* Function declarations. */ extern tcuGetErrorString *cuGetErrorString; @@ -1159,6 +1181,16 @@ extern tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags; extern tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2; extern tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync; +extern tnvrtcGetErrorString *nvrtcGetErrorString; +extern tnvrtcVersion *nvrtcVersion; +extern tnvrtcCreateProgram *nvrtcCreateProgram; +extern tnvrtcDestroyProgram *nvrtcDestroyProgram; +extern tnvrtcCompileProgram *nvrtcCompileProgram; +extern tnvrtcGetPTXSize *nvrtcGetPTXSize; +extern tnvrtcGetPTX *nvrtcGetPTX; +extern tnvrtcGetProgramLogSize *nvrtcGetProgramLogSize; +extern tnvrtcGetProgramLog *nvrtcGetProgramLog; + enum { CUEW_SUCCESS = 0, diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c index 70ac349ea0d..ff454baabab 100644 --- a/extern/cuew/src/cuew.c +++ b/extern/cuew/src/cuew.c @@ -15,9 +15,7 @@ */ #ifdef _MSC_VER -# if _MSC_VER < 1900 -# define snprintf _snprintf -# endif +# define snprintf _snprintf # define popen _popen # define pclose _pclose # define _CRT_SECURE_NO_WARNINGS @@ -51,14 +49,23 @@ typedef void* DynamicLibrary; # define dynamic_library_find(lib, symbol) dlsym(lib, symbol) #endif -#define CUDA_LIBRARY_FIND_CHECKED(name) \ +#define _LIBRARY_FIND_CHECKED(lib, name) \ name = (t##name *)dynamic_library_find(lib, #name); \ assert(name); -#define CUDA_LIBRARY_FIND(name) \ +#define _LIBRARY_FIND(lib, name) \ name = (t##name *)dynamic_library_find(lib, #name); -static DynamicLibrary lib; +#define CUDA_LIBRARY_FIND_CHECKED(name) \ + _LIBRARY_FIND_CHECKED(cuda_lib, name) +#define CUDA_LIBRARY_FIND(name) _LIBRARY_FIND(cuda_lib, name) + +#define NVRTC_LIBRARY_FIND_CHECKED(name) \ + _LIBRARY_FIND_CHECKED(nvrtc_lib, name) +#define NVRTC_LIBRARY_FIND(name) _LIBRARY_FIND(nvrtc_lib, name) + +static DynamicLibrary cuda_lib; +static DynamicLibrary nvrtc_lib; /* Function definitions. */ tcuGetErrorString *cuGetErrorString; @@ -271,12 +278,34 @@ tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags; tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2; tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync; +tnvrtcGetErrorString *nvrtcGetErrorString; +tnvrtcVersion *nvrtcVersion; +tnvrtcCreateProgram *nvrtcCreateProgram; +tnvrtcDestroyProgram *nvrtcDestroyProgram; +tnvrtcCompileProgram *nvrtcCompileProgram; +tnvrtcGetPTXSize *nvrtcGetPTXSize; +tnvrtcGetPTX *nvrtcGetPTX; +tnvrtcGetProgramLogSize *nvrtcGetProgramLogSize; +tnvrtcGetProgramLog *nvrtcGetProgramLog; + + +static DynamicLibrary dynamic_library_open_find(const char **paths) { + int i = 0; + while (paths[i] != NULL) { + DynamicLibrary lib = dynamic_library_open(paths[i]); + if (lib != NULL) { + return lib; + } + ++i; + } + return NULL; +} static void cuewExit(void) { - if(lib != NULL) { + if(cuda_lib != NULL) { /* Ignore errors. */ - dynamic_library_close(lib); - lib = NULL; + dynamic_library_close(cuda_lib); + cuda_lib = NULL; } } @@ -285,12 +314,21 @@ int cuewInit(void) { /* Library paths. */ #ifdef _WIN32 /* Expected in c:/windows/system or similar, no path needed. */ - const char *path = "nvcuda.dll"; + const char *cuda_paths[] = {"nvcuda.dll", NULL}; + const char *nvrtc_paths[] = {"nvrtc.dll", NULL}; #elif defined(__APPLE__) /* Default installation path. */ - const char *path = "/usr/local/cuda/lib/libcuda.dylib"; + const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL}; + const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL}; +#else + const char *cuda_paths[] = {"libcuda.so", NULL}; + const char *nvrtc_paths[] = {"libnvrtc.so", +# if defined(__x86_64__) || defined(_M_X64) + "/usr/local/cuda/lib64/libnvrtc.so", #else - const char *path = "libcuda.so"; + "/usr/local/cuda/lib/libnvrtc.so", +#endif + NULL}; #endif static int initialized = 0; static int result = 0; @@ -309,9 +347,11 @@ int cuewInit(void) { } /* Load library. */ - lib = dynamic_library_open(path); + cuda_lib = dynamic_library_open_find(cuda_paths); + nvrtc_lib = dynamic_library_open_find(nvrtc_paths); - if (lib == NULL) { + /* CUDA library is mandatory to have, while nvrtc might be missing. */ + if (cuda_lib == NULL) { result = CUEW_ERROR_OPEN_FAILED; return result; } @@ -541,6 +581,18 @@ int cuewInit(void) { CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync); + if (nvrtc_lib != NULL) { + NVRTC_LIBRARY_FIND(nvrtcGetErrorString); + NVRTC_LIBRARY_FIND(nvrtcVersion); + NVRTC_LIBRARY_FIND(nvrtcCreateProgram); + NVRTC_LIBRARY_FIND(nvrtcDestroyProgram); + NVRTC_LIBRARY_FIND(nvrtcCompileProgram); + NVRTC_LIBRARY_FIND(nvrtcGetPTXSize); + NVRTC_LIBRARY_FIND(nvrtcGetPTX); + NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize); + NVRTC_LIBRARY_FIND(nvrtcGetProgramLog); + } + result = CUEW_SUCCESS; return result; } -- cgit v1.2.3