Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2014-08-05 11:57:50 +0400
committerSergey Sharybin <sergey.vfx@gmail.com>2014-08-05 11:57:50 +0400
commit77b7e1fe9abb882b7bd1d60f5273e03f079d8a54 (patch)
tree5b23af40e4995e79e584f3ea700f809e7ffbf276 /extern/cuew/auto
parenta3fac84c733f2bf0837dd2719199ee9b76bb7b36 (diff)
Deduplicate CUDA and OpenCL wranglers
For now it was mainly about OpenCL wrangler being duplicated between Cycles and Compositor, but with OpenSubdiv work those wranglers were gonna to be duplicated just once again. This commit makes it so Cycles and Compositor uses wranglers from this repositories: - https://github.com/CudaWrangler/cuew - https://github.com/OpenCLWrangler/clew This repositories are based on the wranglers we used before and they'll be likely continued maintaining by us plus some more players in the market. Pretty much straightforward change with some tricks in the CMake/SCons to make this libs being passed to the linker after all other libraries in order to make OpenSubdiv linked against those wranglers in the future. For those who're worrying about Cycles being less standalone, it's not truth, it's rather more flexible now and in the future different wranglers might be used in Cycles. For now it'll just mean those libs would need to be put into Cycles repository together with some other libs from Blender such as mikkspace. This is mainly platform maintenance commit, should not be any changes to the user space. Reviewers: juicyfruit, dingto, campbellbarton Reviewed By: juicyfruit, dingto, campbellbarton Differential Revision: https://developer.blender.org/D707
Diffstat (limited to 'extern/cuew/auto')
-rw-r--r--extern/cuew/auto/cuda_errors.py35
-rw-r--r--extern/cuew/auto/cuda_extra.py125
-rw-r--r--extern/cuew/auto/cuew_gen.py591
-rwxr-xr-xextern/cuew/auto/cuew_gen.sh10
-rw-r--r--extern/cuew/auto/stdlib.h3
5 files changed, 764 insertions, 0 deletions
diff --git a/extern/cuew/auto/cuda_errors.py b/extern/cuew/auto/cuda_errors.py
new file mode 100644
index 00000000000..464b7765234
--- /dev/null
+++ b/extern/cuew/auto/cuda_errors.py
@@ -0,0 +1,35 @@
+CUDA_ERRORS={
+'CUDA_SUCCESS': "No errors",
+'CUDA_ERROR_INVALID_VALUE': "Invalid value",
+'CUDA_ERROR_OUT_OF_MEMORY': "Out of memory",
+'CUDA_ERROR_NOT_INITIALIZED': "Driver not initialized",
+'CUDA_ERROR_DEINITIALIZED': "Driver deinitialized",
+'CUDA_ERROR_NO_DEVICE': "No CUDA-capable device available",
+'CUDA_ERROR_INVALID_DEVICE': "Invalid device",
+'CUDA_ERROR_INVALID_IMAGE': "Invalid kernel image",
+'CUDA_ERROR_INVALID_CONTEXT': "Invalid context",
+'CUDA_ERROR_CONTEXT_ALREADY_CURRENT': "Context already current",
+'CUDA_ERROR_MAP_FAILED': "Map failed",
+'CUDA_ERROR_UNMAP_FAILED': "Unmap failed",
+'CUDA_ERROR_ARRAY_IS_MAPPED': "Array is mapped",
+'CUDA_ERROR_ALREADY_MAPPED': "Already mapped",
+'CUDA_ERROR_NO_BINARY_FOR_GPU': "No binary for GPU",
+'CUDA_ERROR_ALREADY_ACQUIRED': "Already acquired",
+'CUDA_ERROR_NOT_MAPPED': "Not mapped",
+'CUDA_ERROR_NOT_MAPPED_AS_ARRAY': "Mapped resource not available for access as an array",
+'CUDA_ERROR_NOT_MAPPED_AS_POINTER': "Mapped resource not available for access as a pointer",
+'CUDA_ERROR_ECC_UNCORRECTABLE': "Uncorrectable ECC error detected",
+'CUDA_ERROR_UNSUPPORTED_LIMIT': "CUlimit not supported by device",
+'CUDA_ERROR_INVALID_SOURCE': "Invalid source",
+'CUDA_ERROR_FILE_NOT_FOUND': "File not found",
+'CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND': "Link to a shared object failed to resolve",
+'CUDA_ERROR_SHARED_OBJECT_INIT_FAILED': "Shared object initialization failed",
+'CUDA_ERROR_INVALID_HANDLE': "Invalid handle",
+'CUDA_ERROR_NOT_FOUND': "Not found",
+'CUDA_ERROR_NOT_READY': "CUDA not ready",
+'CUDA_ERROR_LAUNCH_FAILED': "Launch failed",
+'CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES': "Launch exceeded resources",
+'CUDA_ERROR_LAUNCH_TIMEOUT': "Launch exceeded timeout",
+'CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING': "Launch with incompatible texturing",
+'CUDA_ERROR_UNKNOWN': "Unknown error",
+}
diff --git a/extern/cuew/auto/cuda_extra.py b/extern/cuew/auto/cuda_extra.py
new file mode 100644
index 00000000000..fd4f466df83
--- /dev/null
+++ b/extern/cuew/auto/cuda_extra.py
@@ -0,0 +1,125 @@
+extra_code = """
+static void path_join(const char *path1,
+ const char *path2,
+ int maxlen,
+ char *result) {
+#if defined(WIN32) || defined(_WIN32)
+ const char separator = '\\\\';
+#else
+ const char separator = '/';
+#endif
+ int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
+ if (n != -1 && n < maxlen) {
+ result[n] = '\\0';
+ }
+ else {
+ result[maxlen - 1] = '\\0';
+ }
+}
+
+static int path_exists(const char *path) {
+ struct stat st;
+ if (stat(path, &st)) {
+ return 0;
+ }
+ return 1;
+}
+
+const char *cuewCompilerPath(void) {
+#ifdef _WIN32
+ const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
+ const char *executable = "nvcc.exe";
+#else
+ const char *defaultpaths[] = {
+ "/Developer/NVIDIA/CUDA-5.0/bin",
+ "/usr/local/cuda-5.0/bin",
+ "/usr/local/cuda/bin",
+ "/Developer/NVIDIA/CUDA-6.0/bin",
+ "/usr/local/cuda-6.0/bin",
+ "/Developer/NVIDIA/CUDA-5.5/bin",
+ "/usr/local/cuda-5.5/bin",
+ NULL};
+ const char *executable = "nvcc";
+#endif
+ int i;
+
+ const char *binpath = getenv("CUDA_BIN_PATH");
+
+ static char nvcc[65536];
+
+ if (binpath) {
+ path_join(binpath, executable, sizeof(nvcc), nvcc);
+ if (path_exists(nvcc))
+ return nvcc;
+ }
+
+ for (i = 0; defaultpaths[i]; ++i) {
+ path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
+ if (path_exists(nvcc))
+ return nvcc;
+ }
+
+#ifndef _WIN32
+ {
+ FILE *handle = popen("which nvcc", "r");
+ if (handle) {
+ char buffer[4096] = {0};
+ int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
+ buffer[len] = '\\0';
+ pclose(handle);
+
+ if (buffer[0])
+ return "nvcc";
+ }
+ }
+#endif
+
+ return NULL;
+}
+
+int cuewCompilerVersion(void) {
+ const char *path = cuewCompilerPath();
+ const char *marker = "Cuda compilation tools, release ";
+ FILE *pipe;
+ int major, minor;
+ char *versionstr;
+ char buf[128];
+ char output[65536] = "\\0";
+ char command[65536] = "\\0";
+
+ if (path == NULL)
+ return 0;
+
+ /* get --version output */
+ strncpy(command, path, sizeof(command));
+ strncat(command, " --version", sizeof(command) - strlen(path));
+ pipe = popen(command, "r");
+ if (!pipe) {
+ fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
+ return 0;
+ }
+
+ while (!feof(pipe)) {
+ if (fgets(buf, sizeof(buf), pipe) != NULL) {
+ strncat(output, buf, sizeof(output) - strlen(output));
+ }
+ }
+
+ pclose(pipe);
+
+ /* parse version number */
+ versionstr = strstr(output, marker);
+ if (versionstr == NULL) {
+ fprintf(stderr, "CUDA: failed to find version number in:\\n\\n%s\\n", output);
+ return 0;
+ }
+ versionstr += strlen(marker);
+
+ if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
+ fprintf(stderr, "CUDA: failed to parse version number from:\\n\\n%s\\n", output);
+ return 0;
+ }
+
+ return 10 * major + minor;
+}
+"""
diff --git a/extern/cuew/auto/cuew_gen.py b/extern/cuew/auto/cuew_gen.py
new file mode 100644
index 00000000000..4cdc361e6f0
--- /dev/null
+++ b/extern/cuew/auto/cuew_gen.py
@@ -0,0 +1,591 @@
+#!/usr/bin/env python3
+#
+# Copyright 2014 Blender Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+
+# This script generates either header or implementation file from
+# a CUDA header files.
+#
+# Usage: cuew hdr|impl [/path/to/cuda/includes]
+# - hdr means header file will be generated and printed to stdout.
+# - impl means implementation file will be generated and printed to stdout.
+# - /path/to/cuda/includes is a path to a folder with cuda.h and cudaGL.h
+# for which wrangler will be generated.
+
+import os
+import sys
+from cuda_errors import CUDA_ERRORS
+from pycparser import c_parser, c_ast, parse_file
+from subprocess import Popen, PIPE
+
+INCLUDE_DIR = "/usr/include"
+LIB = "CUEW"
+REAL_LIB = "CUDA"
+VERSION_MAJOR = "1"
+VERSION_MINOR = "2"
+COPYRIGHT = """/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */"""
+FILES = ["cuda.h", "cudaGL.h"]
+
+TYPEDEFS = []
+FUNC_TYPEDEFS = []
+SYMBOLS = []
+DEFINES = []
+DEFINES_V2 = []
+ERRORS = []
+
+
+class FuncDefVisitor(c_ast.NodeVisitor):
+ indent = 0
+ prev_complex = False
+ dummy_typedefs = ['size_t', 'CUdeviceptr']
+
+ def _get_quals_string(self, node):
+ if node.quals:
+ return ' '.join(node.quals) + ' '
+ return ''
+
+ def _get_ident_type(self, node):
+ if isinstance(node, c_ast.PtrDecl):
+ return self._get_ident_type(node.type.type) + '*'
+ if isinstance(node, c_ast.ArrayDecl):
+ return self._get_ident_type(node.type)
+ elif isinstance(node, c_ast.Struct):
+ if node.name:
+ return 'struct ' + node.name
+ else:
+ self.indent += 1
+ struct = self._stringify_struct(node)
+ self.indent -= 1
+ return "struct {\n" + \
+ struct + (" " * self.indent) + "}"
+ elif isinstance(node, c_ast.Union):
+ self.indent += 1
+ union = self._stringify_struct(node)
+ self.indent -= 1
+ return "union {\n" + union + (" " * self.indent) + "}"
+ elif isinstance(node, c_ast.Enum):
+ return 'enum ' + node.name
+ elif isinstance(node, c_ast.TypeDecl):
+ return self._get_ident_type(node.type)
+ else:
+ return node.names[0]
+
+ def _stringify_param(self, param):
+ param_type = param.type
+ result = self._get_quals_string(param)
+ result += self._get_ident_type(param_type)
+ if param.name:
+ result += ' ' + param.name
+ if isinstance(param_type, c_ast.ArrayDecl):
+ # TODO(sergey): Workaround to deal with the
+ # preprocessed file where array size got
+ # substituded.
+ dim = param_type.dim.value
+ if param.name == "reserved" and dim == "64":
+ dim = "CU_IPC_HANDLE_SIZE"
+ result += '[' + dim + ']'
+ return result
+
+ def _stringify_params(self, params):
+ result = []
+ for param in params:
+ result.append(self._stringify_param(param))
+ return ', '.join(result)
+
+ def _stringify_struct(self, node):
+ result = ""
+ children = node.children()
+ for child in children:
+ member = self._stringify_param(child[1])
+ result += (" " * self.indent) + member + ";\n"
+ return result
+
+ def _stringify_enum(self, node):
+ result = ""
+ children = node.children()
+ for child in children:
+ if isinstance(child[1], c_ast.EnumeratorList):
+ enumerators = child[1].enumerators
+ for enumerator in enumerators:
+ result += (" " * self.indent) + enumerator.name
+ if enumerator.value:
+ result += " = " + enumerator.value.value
+ result += ",\n"
+ if enumerator.name.startswith("CUDA_ERROR_"):
+ ERRORS.append(enumerator.name)
+ return result
+
+ def visit_Decl(self, node):
+ if node.type.__class__.__name__ == 'FuncDecl':
+ if isinstance(node.type, c_ast.FuncDecl):
+ func_decl = node.type
+ func_decl_type = func_decl.type
+
+ typedef = 'typedef '
+ symbol_name = None
+
+ if isinstance(func_decl_type, c_ast.TypeDecl):
+ symbol_name = func_decl_type.declname
+ typedef += self._get_quals_string(func_decl_type)
+ typedef += self._get_ident_type(func_decl_type.type)
+ typedef += ' CUDAAPI'
+ typedef += ' t' + symbol_name
+ elif isinstance(func_decl_type, c_ast.PtrDecl):
+ ptr_type = func_decl_type.type
+ symbol_name = ptr_type.declname
+ typedef += self._get_quals_string(ptr_type)
+ typedef += self._get_ident_type(func_decl_type)
+ typedef += ' CUDAAPI'
+ typedef += ' t' + symbol_name
+
+ typedef += '(' + \
+ self._stringify_params(func_decl.args.params) + \
+ ');'
+
+ SYMBOLS.append(symbol_name)
+ FUNC_TYPEDEFS.append(typedef)
+
+ def visit_Typedef(self, node):
+ if node.name in self.dummy_typedefs:
+ return
+
+ complex = False
+ type = self._get_ident_type(node.type)
+ quals = self._get_quals_string(node)
+
+ if isinstance(node.type.type, c_ast.Struct):
+ self.indent += 1
+ struct = self._stringify_struct(node.type.type)
+ self.indent -= 1
+ typedef = quals + type + " {\n" + struct + "} " + node.name
+ complex = True
+ elif isinstance(node.type.type, c_ast.Enum):
+ self.indent += 1
+ enum = self._stringify_enum(node.type.type)
+ self.indent -= 1
+ typedef = quals + type + " {\n" + enum + "} " + node.name
+ complex = True
+ else:
+ typedef = quals + type + " " + node.name
+ if complex or self.prev_complex:
+ typedef = "\ntypedef " + typedef + ";"
+ else:
+ typedef = "typedef " + typedef + ";"
+
+ TYPEDEFS.append(typedef)
+
+ self.prev_complex = complex
+
+
+def get_latest_cpp():
+ path_prefix = "/usr/bin"
+ for cpp_version in ["9", "8", "7", "6", "5", "4"]:
+ test_cpp = os.path.join(path_prefix, "cpp-4." + cpp_version)
+ if os.path.exists(test_cpp):
+ return test_cpp
+ return None
+
+
+def preprocess_file(filename, cpp_path):
+ args = [cpp_path, "-I./"]
+ if filename.endswith("GL.h"):
+ args.append("-DCUDAAPI= ")
+ args.append(filename)
+
+ try:
+ pipe = Popen(args,
+ stdout=PIPE,
+ universal_newlines=True)
+ text = pipe.communicate()[0]
+ except OSError as e:
+ raise RuntimeError("Unable to invoke 'cpp'. " +
+ 'Make sure its path was passed correctly\n' +
+ ('Original error: %s' % e))
+
+ return text
+
+
+def parse_files():
+ parser = c_parser.CParser()
+ cpp_path = get_latest_cpp()
+
+ for filename in FILES:
+ filepath = os.path.join(INCLUDE_DIR, filename)
+ dummy_typedefs = {}
+ text = preprocess_file(filepath, cpp_path)
+
+ if filepath.endswith("GL.h"):
+ dummy_typedefs = {
+ "CUresult": "int",
+ "CUgraphicsResource": "void *",
+ "CUdevice": "void *",
+ "CUcontext": "void *",
+ "CUdeviceptr": "void *",
+ "CUstream": "void *"
+ }
+
+ text = "typedef int GLint;\n" + text
+ text = "typedef unsigned int GLuint;\n" + text
+ text = "typedef unsigned int GLenum;\n" + text
+ text = "typedef long size_t;\n" + text
+
+ for typedef in sorted(dummy_typedefs):
+ text = "typedef " + dummy_typedefs[typedef] + " " + \
+ typedef + ";\n" + text
+
+ ast = parser.parse(text, filepath)
+
+ with open(filepath) as f:
+ lines = f.readlines()
+ for line in lines:
+ if line.startswith("#define"):
+ line = line[8:-1]
+ token = line.split()
+ if token[0] not in ("__cuda_cuda_h__",
+ "CUDA_CB",
+ "CUDAAPI"):
+ DEFINES.append(token)
+
+ for line in lines:
+ # TODO(sergey): Use better matching rule for _v2 symbols.
+ if line[0].isspace() and line.lstrip().startswith("#define"):
+ line = line[12:-1]
+ token = line.split()
+ if len(token) == 2 and token[1].endswith("_v2"):
+ DEFINES_V2.append(token)
+
+ v = FuncDefVisitor()
+ for typedef in dummy_typedefs:
+ v.dummy_typedefs.append(typedef)
+ v.visit(ast)
+
+ FUNC_TYPEDEFS.append('')
+ SYMBOLS.append('')
+
+
+def print_copyright():
+ print(COPYRIGHT)
+ print("")
+
+
+def open_header_guard():
+ print("#ifndef __%s_H__" % (LIB))
+ print("#define __%s_H__" % (LIB))
+ print("")
+ print("#ifdef __cplusplus")
+ print("extern \"C\" {")
+ print("#endif")
+ print("")
+
+
+def close_header_guard():
+ print("")
+ print("#ifdef __cplusplus")
+ print("}")
+ print("#endif")
+ print("")
+ print("#endif /* __%s_H__ */" % (LIB))
+
+
+def print_header():
+ print_copyright()
+ open_header_guard()
+
+ # Fot size_t.
+ print("#include <stdlib.h>")
+ print("")
+
+ print("/* Defines. */")
+ print("#define %s_VERSION_MAJOR %s" % (LIB, VERSION_MAJOR))
+ print("#define %s_VERSION_MINOR %s" % (LIB, VERSION_MINOR))
+ print("")
+ for define in DEFINES:
+ print('#define %s' % (' '.join(define)))
+ print("")
+
+ print("""/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
+ * the cuda library has both the old ones for compatibility and new
+ * ones with _v2 postfix,
+ */""")
+ for define in DEFINES_V2:
+ print('#define %s' % (' '.join(define)))
+ print("")
+
+ print("/* Types. */")
+
+ # We handle this specially because of the file is
+ # getting preprocessed.
+ print("""#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+typedef unsigned long long CUdeviceptr;
+#else
+typedef unsigned int CUdeviceptr;
+#endif
+""")
+
+ for typedef in TYPEDEFS:
+ print('%s' % (typedef))
+
+ # TDO(sergey): This is only specific to CUDA wrapper.
+ print("""
+#ifdef _WIN32
+# define CUDAAPI __stdcall
+# define CUDA_CB __stdcall
+#else
+# define CUDAAPI
+# define CUDA_CB
+#endif
+""")
+
+ print("/* Function types. */")
+ for func_typedef in FUNC_TYPEDEFS:
+ print('%s' % (func_typedef))
+ print("")
+
+ print("/* Function declarations. */")
+ for symbol in SYMBOLS:
+ if symbol:
+ print('extern t%s *%s;' % (symbol, symbol))
+ else:
+ print("")
+
+ print("")
+ print("enum {")
+ print(" CUEW_SUCCESS = 0,")
+ print(" CUEW_ERROR_OPEN_FAILED = -1,")
+ print(" CUEW_ERROR_ATEXIT_FAILED = -2,")
+ print("};")
+ print("")
+ print("int %sInit(void);" % (LIB.lower()))
+ # TODO(sergey): Get rid of hardcoded CUresult.
+ print("const char *%sErrorString(CUresult result);" % (LIB.lower()))
+ print("const char *cuewCompilerPath(void);")
+ print("int cuewCompilerVersion(void);")
+
+ close_header_guard()
+
+
+def print_dl_wrapper():
+ print("""#ifdef _WIN32
+# define WIN32_LEAN_AND_MEAN
+# define VC_EXTRALEAN
+# include <windows.h>
+
+/* Utility macros. */
+
+typedef HMODULE DynamicLibrary;
+
+# define dynamic_library_open(path) LoadLibrary(path)
+# define dynamic_library_close(lib) FreeLibrary(lib)
+# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
+#else
+# include <dlfcn.h>
+
+typedef void* DynamicLibrary;
+
+# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
+# define dynamic_library_close(lib) dlclose(lib)
+# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
+#endif
+""")
+
+
+def print_dl_helper_macro():
+ print("""#define %s_LIBRARY_FIND_CHECKED(name) \\
+ name = (t##name *)dynamic_library_find(lib, #name);
+
+#define %s_LIBRARY_FIND(name) \\
+ name = (t##name *)dynamic_library_find(lib, #name); \\
+ assert(name);
+
+static DynamicLibrary lib;""" % (REAL_LIB, REAL_LIB))
+ print("")
+
+
+def print_dl_close():
+ print("""static void %sExit(void) {
+ if(lib != NULL) {
+ /* Ignore errors. */
+ dynamic_library_close(lib);
+ lib = NULL;
+ }
+}""" % (LIB.lower()))
+ print("")
+
+
+def print_lib_path():
+ # TODO(sergey): get rid of hardcoded libraries.
+ print("""#ifdef _WIN32
+ /* Expected in c:/windows/system or similar, no path needed. */
+ const char *path = "nvcuda.dll";
+#elif defined(__APPLE__)
+ /* Default installation path. */
+ const char *path = "/usr/local/cuda/lib/libcuda.dylib";
+#else
+ const char *path = "libcuda.so";
+#endif""")
+
+
+def print_init_guard():
+ print(""" static int initialized = 0;
+ static int result = 0;
+ int error, driver_version;
+
+ if (initialized) {
+ return result;
+ }
+
+ initialized = 1;
+
+ error = atexit(cuewExit);
+ if (error) {
+ result = CUEW_ERROR_ATEXIT_FAILED;
+ return result;
+ }
+
+ /* Load library. */
+ lib = dynamic_library_open(path);
+
+ if (lib == NULL) {
+ result = CUEW_ERROR_OPEN_FAILED;
+ return result;
+ }""")
+ print("")
+
+
+def print_driver_version_guard():
+ # TODO(sergey): Currently it's hardcoded for CUDA only.
+ print(""" /* Detect driver version. */
+ driver_version = 1000;
+
+ %s_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
+ if (cuDriverGetVersion) {
+ cuDriverGetVersion(&driver_version);
+ }
+
+ /* We require version 4.0. */
+ if (driver_version < 4000) {
+ result = CUEW_ERROR_OPEN_FAILED;
+ return result;
+ }""" % (REAL_LIB))
+
+
+def print_dl_init():
+ print("int %sInit(void) {" % (LIB.lower()))
+
+ print(" /* Library paths. */")
+ print_lib_path()
+ print_init_guard()
+ print_driver_version_guard()
+
+ print(" /* Fetch all function pointers. */")
+ for symbol in SYMBOLS:
+ if symbol:
+ print(" %s_LIBRARY_FIND(%s);" % (REAL_LIB, symbol))
+ else:
+ print("")
+
+ print("")
+ print(" result = CUEW_SUCCESS;")
+ print(" return result;")
+
+ print("}")
+
+
+def print_implementation():
+ print_copyright()
+
+ # TODO(sergey): Get rid of hardcoded header.
+ print("""#ifdef _MSC_VER
+# define snprintf _snprintf
+# define popen _popen
+# define pclose _pclose
+# define _CRT_SECURE_NO_WARNINGS
+#endif
+""")
+ print("#include <cuew.h>")
+ print("#include <assert.h>")
+ print("#include <stdio.h>")
+ print("#include <string.h>")
+ print("#include <sys/stat.h>")
+ print("")
+
+ print_dl_wrapper()
+ print_dl_helper_macro()
+
+ print("/* Function definitions. */")
+ for symbol in SYMBOLS:
+ if symbol:
+ print('t%s *%s;' % (symbol, symbol))
+ else:
+ print("")
+ print("")
+
+ print_dl_close()
+
+ print("/* Implementation function. */")
+ print_dl_init()
+
+ print("")
+ # TODO(sergey): Get rid of hardcoded CUresult.
+ print("const char *%sErrorString(CUresult result) {" % (LIB.lower()))
+ print(" switch(result) {")
+ print(" case CUDA_SUCCESS: return \"No errors\";")
+
+ for error in ERRORS:
+ if error in CUDA_ERRORS:
+ str = CUDA_ERRORS[error]
+ else:
+ str = error[11:]
+ print(" case %s: return \"%s\";" % (error, str))
+
+ print(" default: return \"Unknown CUDA error value\";")
+ print(" }")
+ print("}")
+
+ from cuda_extra import extra_code
+ print(extra_code)
+
+if __name__ == "__main__":
+
+ if len(sys.argv) != 2 and len(sys.argv) != 3:
+ print("Usage: %s hdr|impl [/path/to/cuda/toolkit/include]" %
+ (sys.argv[0]))
+ exit(1)
+
+ if len(sys.argv) == 3:
+ INCLUDE_DIR = sys.argv[2]
+
+ parse_files()
+
+ if sys.argv[1] == "hdr":
+ print_header()
+ elif sys.argv[1] == "impl":
+ print_implementation()
+ else:
+ print("Unknown command %s" % (sys.argv[1]))
+ exit(1)
diff --git a/extern/cuew/auto/cuew_gen.sh b/extern/cuew/auto/cuew_gen.sh
new file mode 100755
index 00000000000..b44987b801d
--- /dev/null
+++ b/extern/cuew/auto/cuew_gen.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+# This script invokes cuew_gen.py and updates the
+# header and source files in the repository.
+
+SCRIPT=`realpath -s $0`
+DIR=`dirname $SCRIPT`
+
+python ${DIR}/cuew_gen.py hdr $@ > $DIR/../include/cuew.h
+python ${DIR}/cuew_gen.py impl $@ > $DIR/../src/cuew.c
diff --git a/extern/cuew/auto/stdlib.h b/extern/cuew/auto/stdlib.h
new file mode 100644
index 00000000000..75976c8574f
--- /dev/null
+++ b/extern/cuew/auto/stdlib.h
@@ -0,0 +1,3 @@
+/* This file is needed to workaround issue with parsing system headers. */
+
+typedef long size_t;