diff options
Diffstat (limited to 'extern/cuew')
-rw-r--r-- | extern/cuew/CMakeLists.txt | 40 | ||||
-rw-r--r-- | extern/cuew/LICENSE | 174 | ||||
-rw-r--r-- | extern/cuew/README | 12 | ||||
-rw-r--r-- | extern/cuew/SConscript | 35 | ||||
-rw-r--r-- | extern/cuew/auto/cuda_errors.py | 35 | ||||
-rw-r--r-- | extern/cuew/auto/cuda_extra.py | 125 | ||||
-rw-r--r-- | extern/cuew/auto/cuew_gen.py | 591 | ||||
-rwxr-xr-x | extern/cuew/auto/cuew_gen.sh | 10 | ||||
-rw-r--r-- | extern/cuew/auto/stdlib.h | 3 | ||||
-rw-r--r-- | extern/cuew/include/cuew.h | 1138 | ||||
-rw-r--r-- | extern/cuew/src/cuew.c | 710 |
11 files changed, 2873 insertions, 0 deletions
diff --git a/extern/cuew/CMakeLists.txt b/extern/cuew/CMakeLists.txt new file mode 100644 index 00000000000..284fbbc6aca --- /dev/null +++ b/extern/cuew/CMakeLists.txt @@ -0,0 +1,40 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# The Original Code is Copyright (C) 2006, Blender Foundation +# All rights reserved. +# +# The Original Code is: all of this file. +# +# Contributor(s): Jacques Beaurain. +# +# ***** END GPL LICENSE BLOCK ***** + +set(INC + . + include +) + +set(INC_SYS + +) + +set(SRC + include/cuew.h + src/cuew.c +) + +blender_add_lib(extern_cuew "${SRC}" "${INC}" "${INC_SYS}") diff --git a/extern/cuew/LICENSE b/extern/cuew/LICENSE new file mode 100644 index 00000000000..c7533090bbe --- /dev/null +++ b/extern/cuew/LICENSE @@ -0,0 +1,174 @@ + + Modified Apache 2.0 License + + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor + and its affiliates, except as required to comply with Section 4(c) of + the License and to reproduce the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. diff --git a/extern/cuew/README b/extern/cuew/README new file mode 100644 index 00000000000..3c43b7278d9 --- /dev/null +++ b/extern/cuew/README @@ -0,0 +1,12 @@ +The CUDA Extension Wrangler Library (CUEW) is a cross-platform open-source +C/C++ extension loading library. CUEW provides efficient run-time mechanisms +for determining which CUDA functions and extensions extensions are supported +on the target platform. + +CUDA core and extension functionality is exposed in a single header file. +GUEW has been tested on a variety of operating systems, including Windows, +Linux, Mac OS X. + +LICENSE + +CUEW library is released under the Apache 2.0 license. diff --git a/extern/cuew/SConscript b/extern/cuew/SConscript new file mode 100644 index 00000000000..9c12c71133c --- /dev/null +++ b/extern/cuew/SConscript @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# The Original Code is Copyright (C) 2006, Blender Foundation +# All rights reserved. +# +# The Original Code is: all of this file. +# +# Contributor(s): Nathan Letwory. +# +# ***** END GPL LICENSE BLOCK ***** + +Import ('env') + +sources = env.Glob('src/cuew.c') + +incs = 'include' +defs = [] + +env.BlenderLib ('extern_cuew', sources, Split(incs), defines=defs, libtype=['system'], priority = [0]) diff --git a/extern/cuew/auto/cuda_errors.py b/extern/cuew/auto/cuda_errors.py new file mode 100644 index 00000000000..464b7765234 --- /dev/null +++ b/extern/cuew/auto/cuda_errors.py @@ -0,0 +1,35 @@ +CUDA_ERRORS={ +'CUDA_SUCCESS': "No errors", +'CUDA_ERROR_INVALID_VALUE': "Invalid value", +'CUDA_ERROR_OUT_OF_MEMORY': "Out of memory", +'CUDA_ERROR_NOT_INITIALIZED': "Driver not initialized", +'CUDA_ERROR_DEINITIALIZED': "Driver deinitialized", +'CUDA_ERROR_NO_DEVICE': "No CUDA-capable device available", +'CUDA_ERROR_INVALID_DEVICE': "Invalid device", +'CUDA_ERROR_INVALID_IMAGE': "Invalid kernel image", +'CUDA_ERROR_INVALID_CONTEXT': "Invalid context", +'CUDA_ERROR_CONTEXT_ALREADY_CURRENT': "Context already current", +'CUDA_ERROR_MAP_FAILED': "Map failed", +'CUDA_ERROR_UNMAP_FAILED': "Unmap failed", +'CUDA_ERROR_ARRAY_IS_MAPPED': "Array is mapped", +'CUDA_ERROR_ALREADY_MAPPED': "Already mapped", +'CUDA_ERROR_NO_BINARY_FOR_GPU': "No binary for GPU", +'CUDA_ERROR_ALREADY_ACQUIRED': "Already acquired", +'CUDA_ERROR_NOT_MAPPED': "Not mapped", +'CUDA_ERROR_NOT_MAPPED_AS_ARRAY': "Mapped resource not available for access as an array", +'CUDA_ERROR_NOT_MAPPED_AS_POINTER': "Mapped resource not available for access as a pointer", +'CUDA_ERROR_ECC_UNCORRECTABLE': "Uncorrectable ECC error detected", +'CUDA_ERROR_UNSUPPORTED_LIMIT': "CUlimit not supported by device", +'CUDA_ERROR_INVALID_SOURCE': "Invalid source", +'CUDA_ERROR_FILE_NOT_FOUND': "File not found", +'CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND': "Link to a shared object failed to resolve", +'CUDA_ERROR_SHARED_OBJECT_INIT_FAILED': "Shared object initialization failed", +'CUDA_ERROR_INVALID_HANDLE': "Invalid handle", +'CUDA_ERROR_NOT_FOUND': "Not found", +'CUDA_ERROR_NOT_READY': "CUDA not ready", +'CUDA_ERROR_LAUNCH_FAILED': "Launch failed", +'CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES': "Launch exceeded resources", +'CUDA_ERROR_LAUNCH_TIMEOUT': "Launch exceeded timeout", +'CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING': "Launch with incompatible texturing", +'CUDA_ERROR_UNKNOWN': "Unknown error", +} diff --git a/extern/cuew/auto/cuda_extra.py b/extern/cuew/auto/cuda_extra.py new file mode 100644 index 00000000000..fd4f466df83 --- /dev/null +++ b/extern/cuew/auto/cuda_extra.py @@ -0,0 +1,125 @@ +extra_code = """ +static void path_join(const char *path1, + const char *path2, + int maxlen, + char *result) { +#if defined(WIN32) || defined(_WIN32) + const char separator = '\\\\'; +#else + const char separator = '/'; +#endif + int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2); + if (n != -1 && n < maxlen) { + result[n] = '\\0'; + } + else { + result[maxlen - 1] = '\\0'; + } +} + +static int path_exists(const char *path) { + struct stat st; + if (stat(path, &st)) { + return 0; + } + return 1; +} + +const char *cuewCompilerPath(void) { +#ifdef _WIN32 + const char *defaultpaths[] = {"C:/CUDA/bin", NULL}; + const char *executable = "nvcc.exe"; +#else + const char *defaultpaths[] = { + "/Developer/NVIDIA/CUDA-5.0/bin", + "/usr/local/cuda-5.0/bin", + "/usr/local/cuda/bin", + "/Developer/NVIDIA/CUDA-6.0/bin", + "/usr/local/cuda-6.0/bin", + "/Developer/NVIDIA/CUDA-5.5/bin", + "/usr/local/cuda-5.5/bin", + NULL}; + const char *executable = "nvcc"; +#endif + int i; + + const char *binpath = getenv("CUDA_BIN_PATH"); + + static char nvcc[65536]; + + if (binpath) { + path_join(binpath, executable, sizeof(nvcc), nvcc); + if (path_exists(nvcc)) + return nvcc; + } + + for (i = 0; defaultpaths[i]; ++i) { + path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc); + if (path_exists(nvcc)) + return nvcc; + } + +#ifndef _WIN32 + { + FILE *handle = popen("which nvcc", "r"); + if (handle) { + char buffer[4096] = {0}; + int len = fread(buffer, 1, sizeof(buffer) - 1, handle); + buffer[len] = '\\0'; + pclose(handle); + + if (buffer[0]) + return "nvcc"; + } + } +#endif + + return NULL; +} + +int cuewCompilerVersion(void) { + const char *path = cuewCompilerPath(); + const char *marker = "Cuda compilation tools, release "; + FILE *pipe; + int major, minor; + char *versionstr; + char buf[128]; + char output[65536] = "\\0"; + char command[65536] = "\\0"; + + if (path == NULL) + return 0; + + /* get --version output */ + strncpy(command, path, sizeof(command)); + strncat(command, " --version", sizeof(command) - strlen(path)); + pipe = popen(command, "r"); + if (!pipe) { + fprintf(stderr, "CUDA: failed to run compiler to retrieve version"); + return 0; + } + + while (!feof(pipe)) { + if (fgets(buf, sizeof(buf), pipe) != NULL) { + strncat(output, buf, sizeof(output) - strlen(output)); + } + } + + pclose(pipe); + + /* parse version number */ + versionstr = strstr(output, marker); + if (versionstr == NULL) { + fprintf(stderr, "CUDA: failed to find version number in:\\n\\n%s\\n", output); + return 0; + } + versionstr += strlen(marker); + + if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) { + fprintf(stderr, "CUDA: failed to parse version number from:\\n\\n%s\\n", output); + return 0; + } + + return 10 * major + minor; +} +""" diff --git a/extern/cuew/auto/cuew_gen.py b/extern/cuew/auto/cuew_gen.py new file mode 100644 index 00000000000..4cdc361e6f0 --- /dev/null +++ b/extern/cuew/auto/cuew_gen.py @@ -0,0 +1,591 @@ +#!/usr/bin/env python3 +# +# Copyright 2014 Blender Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License + +# This script generates either header or implementation file from +# a CUDA header files. +# +# Usage: cuew hdr|impl [/path/to/cuda/includes] +# - hdr means header file will be generated and printed to stdout. +# - impl means implementation file will be generated and printed to stdout. +# - /path/to/cuda/includes is a path to a folder with cuda.h and cudaGL.h +# for which wrangler will be generated. + +import os +import sys +from cuda_errors import CUDA_ERRORS +from pycparser import c_parser, c_ast, parse_file +from subprocess import Popen, PIPE + +INCLUDE_DIR = "/usr/include" +LIB = "CUEW" +REAL_LIB = "CUDA" +VERSION_MAJOR = "1" +VERSION_MINOR = "2" +COPYRIGHT = """/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */""" +FILES = ["cuda.h", "cudaGL.h"] + +TYPEDEFS = [] +FUNC_TYPEDEFS = [] +SYMBOLS = [] +DEFINES = [] +DEFINES_V2 = [] +ERRORS = [] + + +class FuncDefVisitor(c_ast.NodeVisitor): + indent = 0 + prev_complex = False + dummy_typedefs = ['size_t', 'CUdeviceptr'] + + def _get_quals_string(self, node): + if node.quals: + return ' '.join(node.quals) + ' ' + return '' + + def _get_ident_type(self, node): + if isinstance(node, c_ast.PtrDecl): + return self._get_ident_type(node.type.type) + '*' + if isinstance(node, c_ast.ArrayDecl): + return self._get_ident_type(node.type) + elif isinstance(node, c_ast.Struct): + if node.name: + return 'struct ' + node.name + else: + self.indent += 1 + struct = self._stringify_struct(node) + self.indent -= 1 + return "struct {\n" + \ + struct + (" " * self.indent) + "}" + elif isinstance(node, c_ast.Union): + self.indent += 1 + union = self._stringify_struct(node) + self.indent -= 1 + return "union {\n" + union + (" " * self.indent) + "}" + elif isinstance(node, c_ast.Enum): + return 'enum ' + node.name + elif isinstance(node, c_ast.TypeDecl): + return self._get_ident_type(node.type) + else: + return node.names[0] + + def _stringify_param(self, param): + param_type = param.type + result = self._get_quals_string(param) + result += self._get_ident_type(param_type) + if param.name: + result += ' ' + param.name + if isinstance(param_type, c_ast.ArrayDecl): + # TODO(sergey): Workaround to deal with the + # preprocessed file where array size got + # substituded. + dim = param_type.dim.value + if param.name == "reserved" and dim == "64": + dim = "CU_IPC_HANDLE_SIZE" + result += '[' + dim + ']' + return result + + def _stringify_params(self, params): + result = [] + for param in params: + result.append(self._stringify_param(param)) + return ', '.join(result) + + def _stringify_struct(self, node): + result = "" + children = node.children() + for child in children: + member = self._stringify_param(child[1]) + result += (" " * self.indent) + member + ";\n" + return result + + def _stringify_enum(self, node): + result = "" + children = node.children() + for child in children: + if isinstance(child[1], c_ast.EnumeratorList): + enumerators = child[1].enumerators + for enumerator in enumerators: + result += (" " * self.indent) + enumerator.name + if enumerator.value: + result += " = " + enumerator.value.value + result += ",\n" + if enumerator.name.startswith("CUDA_ERROR_"): + ERRORS.append(enumerator.name) + return result + + def visit_Decl(self, node): + if node.type.__class__.__name__ == 'FuncDecl': + if isinstance(node.type, c_ast.FuncDecl): + func_decl = node.type + func_decl_type = func_decl.type + + typedef = 'typedef ' + symbol_name = None + + if isinstance(func_decl_type, c_ast.TypeDecl): + symbol_name = func_decl_type.declname + typedef += self._get_quals_string(func_decl_type) + typedef += self._get_ident_type(func_decl_type.type) + typedef += ' CUDAAPI' + typedef += ' t' + symbol_name + elif isinstance(func_decl_type, c_ast.PtrDecl): + ptr_type = func_decl_type.type + symbol_name = ptr_type.declname + typedef += self._get_quals_string(ptr_type) + typedef += self._get_ident_type(func_decl_type) + typedef += ' CUDAAPI' + typedef += ' t' + symbol_name + + typedef += '(' + \ + self._stringify_params(func_decl.args.params) + \ + ');' + + SYMBOLS.append(symbol_name) + FUNC_TYPEDEFS.append(typedef) + + def visit_Typedef(self, node): + if node.name in self.dummy_typedefs: + return + + complex = False + type = self._get_ident_type(node.type) + quals = self._get_quals_string(node) + + if isinstance(node.type.type, c_ast.Struct): + self.indent += 1 + struct = self._stringify_struct(node.type.type) + self.indent -= 1 + typedef = quals + type + " {\n" + struct + "} " + node.name + complex = True + elif isinstance(node.type.type, c_ast.Enum): + self.indent += 1 + enum = self._stringify_enum(node.type.type) + self.indent -= 1 + typedef = quals + type + " {\n" + enum + "} " + node.name + complex = True + else: + typedef = quals + type + " " + node.name + if complex or self.prev_complex: + typedef = "\ntypedef " + typedef + ";" + else: + typedef = "typedef " + typedef + ";" + + TYPEDEFS.append(typedef) + + self.prev_complex = complex + + +def get_latest_cpp(): + path_prefix = "/usr/bin" + for cpp_version in ["9", "8", "7", "6", "5", "4"]: + test_cpp = os.path.join(path_prefix, "cpp-4." + cpp_version) + if os.path.exists(test_cpp): + return test_cpp + return None + + +def preprocess_file(filename, cpp_path): + args = [cpp_path, "-I./"] + if filename.endswith("GL.h"): + args.append("-DCUDAAPI= ") + args.append(filename) + + try: + pipe = Popen(args, + stdout=PIPE, + universal_newlines=True) + text = pipe.communicate()[0] + except OSError as e: + raise RuntimeError("Unable to invoke 'cpp'. " + + 'Make sure its path was passed correctly\n' + + ('Original error: %s' % e)) + + return text + + +def parse_files(): + parser = c_parser.CParser() + cpp_path = get_latest_cpp() + + for filename in FILES: + filepath = os.path.join(INCLUDE_DIR, filename) + dummy_typedefs = {} + text = preprocess_file(filepath, cpp_path) + + if filepath.endswith("GL.h"): + dummy_typedefs = { + "CUresult": "int", + "CUgraphicsResource": "void *", + "CUdevice": "void *", + "CUcontext": "void *", + "CUdeviceptr": "void *", + "CUstream": "void *" + } + + text = "typedef int GLint;\n" + text + text = "typedef unsigned int GLuint;\n" + text + text = "typedef unsigned int GLenum;\n" + text + text = "typedef long size_t;\n" + text + + for typedef in sorted(dummy_typedefs): + text = "typedef " + dummy_typedefs[typedef] + " " + \ + typedef + ";\n" + text + + ast = parser.parse(text, filepath) + + with open(filepath) as f: + lines = f.readlines() + for line in lines: + if line.startswith("#define"): + line = line[8:-1] + token = line.split() + if token[0] not in ("__cuda_cuda_h__", + "CUDA_CB", + "CUDAAPI"): + DEFINES.append(token) + + for line in lines: + # TODO(sergey): Use better matching rule for _v2 symbols. + if line[0].isspace() and line.lstrip().startswith("#define"): + line = line[12:-1] + token = line.split() + if len(token) == 2 and token[1].endswith("_v2"): + DEFINES_V2.append(token) + + v = FuncDefVisitor() + for typedef in dummy_typedefs: + v.dummy_typedefs.append(typedef) + v.visit(ast) + + FUNC_TYPEDEFS.append('') + SYMBOLS.append('') + + +def print_copyright(): + print(COPYRIGHT) + print("") + + +def open_header_guard(): + print("#ifndef __%s_H__" % (LIB)) + print("#define __%s_H__" % (LIB)) + print("") + print("#ifdef __cplusplus") + print("extern \"C\" {") + print("#endif") + print("") + + +def close_header_guard(): + print("") + print("#ifdef __cplusplus") + print("}") + print("#endif") + print("") + print("#endif /* __%s_H__ */" % (LIB)) + + +def print_header(): + print_copyright() + open_header_guard() + + # Fot size_t. + print("#include <stdlib.h>") + print("") + + print("/* Defines. */") + print("#define %s_VERSION_MAJOR %s" % (LIB, VERSION_MAJOR)) + print("#define %s_VERSION_MINOR %s" % (LIB, VERSION_MINOR)) + print("") + for define in DEFINES: + print('#define %s' % (' '.join(define))) + print("") + + print("""/* Functions which changed 3.1 -> 3.2 for 64 bit stuff, + * the cuda library has both the old ones for compatibility and new + * ones with _v2 postfix, + */""") + for define in DEFINES_V2: + print('#define %s' % (' '.join(define))) + print("") + + print("/* Types. */") + + # We handle this specially because of the file is + # getting preprocessed. + print("""#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +typedef unsigned long long CUdeviceptr; +#else +typedef unsigned int CUdeviceptr; +#endif +""") + + for typedef in TYPEDEFS: + print('%s' % (typedef)) + + # TDO(sergey): This is only specific to CUDA wrapper. + print(""" +#ifdef _WIN32 +# define CUDAAPI __stdcall +# define CUDA_CB __stdcall +#else +# define CUDAAPI +# define CUDA_CB +#endif +""") + + print("/* Function types. */") + for func_typedef in FUNC_TYPEDEFS: + print('%s' % (func_typedef)) + print("") + + print("/* Function declarations. */") + for symbol in SYMBOLS: + if symbol: + print('extern t%s *%s;' % (symbol, symbol)) + else: + print("") + + print("") + print("enum {") + print(" CUEW_SUCCESS = 0,") + print(" CUEW_ERROR_OPEN_FAILED = -1,") + print(" CUEW_ERROR_ATEXIT_FAILED = -2,") + print("};") + print("") + print("int %sInit(void);" % (LIB.lower())) + # TODO(sergey): Get rid of hardcoded CUresult. + print("const char *%sErrorString(CUresult result);" % (LIB.lower())) + print("const char *cuewCompilerPath(void);") + print("int cuewCompilerVersion(void);") + + close_header_guard() + + +def print_dl_wrapper(): + print("""#ifdef _WIN32 +# define WIN32_LEAN_AND_MEAN +# define VC_EXTRALEAN +# include <windows.h> + +/* Utility macros. */ + +typedef HMODULE DynamicLibrary; + +# define dynamic_library_open(path) LoadLibrary(path) +# define dynamic_library_close(lib) FreeLibrary(lib) +# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol) +#else +# include <dlfcn.h> + +typedef void* DynamicLibrary; + +# define dynamic_library_open(path) dlopen(path, RTLD_NOW) +# define dynamic_library_close(lib) dlclose(lib) +# define dynamic_library_find(lib, symbol) dlsym(lib, symbol) +#endif +""") + + +def print_dl_helper_macro(): + print("""#define %s_LIBRARY_FIND_CHECKED(name) \\ + name = (t##name *)dynamic_library_find(lib, #name); + +#define %s_LIBRARY_FIND(name) \\ + name = (t##name *)dynamic_library_find(lib, #name); \\ + assert(name); + +static DynamicLibrary lib;""" % (REAL_LIB, REAL_LIB)) + print("") + + +def print_dl_close(): + print("""static void %sExit(void) { + if(lib != NULL) { + /* Ignore errors. */ + dynamic_library_close(lib); + lib = NULL; + } +}""" % (LIB.lower())) + print("") + + +def print_lib_path(): + # TODO(sergey): get rid of hardcoded libraries. + print("""#ifdef _WIN32 + /* Expected in c:/windows/system or similar, no path needed. */ + const char *path = "nvcuda.dll"; +#elif defined(__APPLE__) + /* Default installation path. */ + const char *path = "/usr/local/cuda/lib/libcuda.dylib"; +#else + const char *path = "libcuda.so"; +#endif""") + + +def print_init_guard(): + print(""" static int initialized = 0; + static int result = 0; + int error, driver_version; + + if (initialized) { + return result; + } + + initialized = 1; + + error = atexit(cuewExit); + if (error) { + result = CUEW_ERROR_ATEXIT_FAILED; + return result; + } + + /* Load library. */ + lib = dynamic_library_open(path); + + if (lib == NULL) { + result = CUEW_ERROR_OPEN_FAILED; + return result; + }""") + print("") + + +def print_driver_version_guard(): + # TODO(sergey): Currently it's hardcoded for CUDA only. + print(""" /* Detect driver version. */ + driver_version = 1000; + + %s_LIBRARY_FIND_CHECKED(cuDriverGetVersion); + if (cuDriverGetVersion) { + cuDriverGetVersion(&driver_version); + } + + /* We require version 4.0. */ + if (driver_version < 4000) { + result = CUEW_ERROR_OPEN_FAILED; + return result; + }""" % (REAL_LIB)) + + +def print_dl_init(): + print("int %sInit(void) {" % (LIB.lower())) + + print(" /* Library paths. */") + print_lib_path() + print_init_guard() + print_driver_version_guard() + + print(" /* Fetch all function pointers. */") + for symbol in SYMBOLS: + if symbol: + print(" %s_LIBRARY_FIND(%s);" % (REAL_LIB, symbol)) + else: + print("") + + print("") + print(" result = CUEW_SUCCESS;") + print(" return result;") + + print("}") + + +def print_implementation(): + print_copyright() + + # TODO(sergey): Get rid of hardcoded header. + print("""#ifdef _MSC_VER +# define snprintf _snprintf +# define popen _popen +# define pclose _pclose +# define _CRT_SECURE_NO_WARNINGS +#endif +""") + print("#include <cuew.h>") + print("#include <assert.h>") + print("#include <stdio.h>") + print("#include <string.h>") + print("#include <sys/stat.h>") + print("") + + print_dl_wrapper() + print_dl_helper_macro() + + print("/* Function definitions. */") + for symbol in SYMBOLS: + if symbol: + print('t%s *%s;' % (symbol, symbol)) + else: + print("") + print("") + + print_dl_close() + + print("/* Implementation function. */") + print_dl_init() + + print("") + # TODO(sergey): Get rid of hardcoded CUresult. + print("const char *%sErrorString(CUresult result) {" % (LIB.lower())) + print(" switch(result) {") + print(" case CUDA_SUCCESS: return \"No errors\";") + + for error in ERRORS: + if error in CUDA_ERRORS: + str = CUDA_ERRORS[error] + else: + str = error[11:] + print(" case %s: return \"%s\";" % (error, str)) + + print(" default: return \"Unknown CUDA error value\";") + print(" }") + print("}") + + from cuda_extra import extra_code + print(extra_code) + +if __name__ == "__main__": + + if len(sys.argv) != 2 and len(sys.argv) != 3: + print("Usage: %s hdr|impl [/path/to/cuda/toolkit/include]" % + (sys.argv[0])) + exit(1) + + if len(sys.argv) == 3: + INCLUDE_DIR = sys.argv[2] + + parse_files() + + if sys.argv[1] == "hdr": + print_header() + elif sys.argv[1] == "impl": + print_implementation() + else: + print("Unknown command %s" % (sys.argv[1])) + exit(1) diff --git a/extern/cuew/auto/cuew_gen.sh b/extern/cuew/auto/cuew_gen.sh new file mode 100755 index 00000000000..b44987b801d --- /dev/null +++ b/extern/cuew/auto/cuew_gen.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +# This script invokes cuew_gen.py and updates the +# header and source files in the repository. + +SCRIPT=`realpath -s $0` +DIR=`dirname $SCRIPT` + +python ${DIR}/cuew_gen.py hdr $@ > $DIR/../include/cuew.h +python ${DIR}/cuew_gen.py impl $@ > $DIR/../src/cuew.c diff --git a/extern/cuew/auto/stdlib.h b/extern/cuew/auto/stdlib.h new file mode 100644 index 00000000000..75976c8574f --- /dev/null +++ b/extern/cuew/auto/stdlib.h @@ -0,0 +1,3 @@ +/* This file is needed to workaround issue with parsing system headers. */ + +typedef long size_t; diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h new file mode 100644 index 00000000000..fd03311ad41 --- /dev/null +++ b/extern/cuew/include/cuew.h @@ -0,0 +1,1138 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +#ifndef __CUEW_H__ +#define __CUEW_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdlib.h> + +/* Defines. */ +#define CUEW_VERSION_MAJOR 1 +#define CUEW_VERSION_MINOR 2 + +#define CUDA_VERSION 6000 +#define CU_IPC_HANDLE_SIZE 64 +#define CU_MEMHOSTALLOC_PORTABLE 0x01 +#define CU_MEMHOSTALLOC_DEVICEMAP 0x02 +#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04 +#define CU_MEMHOSTREGISTER_PORTABLE 0x01 +#define CU_MEMHOSTREGISTER_DEVICEMAP 0x02 +#define CUDA_ARRAY3D_LAYERED 0x01 +#define CUDA_ARRAY3D_2DARRAY 0x01 +#define CUDA_ARRAY3D_SURFACE_LDST 0x02 +#define CUDA_ARRAY3D_CUBEMAP 0x04 +#define CUDA_ARRAY3D_TEXTURE_GATHER 0x08 +#define CUDA_ARRAY3D_DEPTH_TEXTURE 0x10 +#define CU_TRSA_OVERRIDE_FORMAT 0x01 +#define CU_TRSF_READ_AS_INTEGER 0x01 +#define CU_TRSF_NORMALIZED_COORDINATES 0x02 +#define CU_TRSF_SRGB 0x10 +#define CU_LAUNCH_PARAM_END ((void*)0x00) +#define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) +#define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) +#define CU_PARAM_TR_DEFAULT -1 +#define CUDAGL_H + +/* Functions which changed 3.1 -> 3.2 for 64 bit stuff, + * the cuda library has both the old ones for compatibility and new + * ones with _v2 postfix, + */ +#define cuDeviceTotalMem cuDeviceTotalMem_v2 +#define cuCtxCreate cuCtxCreate_v2 +#define cuModuleGetGlobal cuModuleGetGlobal_v2 +#define cuMemGetInfo cuMemGetInfo_v2 +#define cuMemAlloc cuMemAlloc_v2 +#define cuMemAllocPitch cuMemAllocPitch_v2 +#define cuMemFree cuMemFree_v2 +#define cuMemGetAddressRange cuMemGetAddressRange_v2 +#define cuMemAllocHost cuMemAllocHost_v2 +#define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v2 +#define cuMemcpyHtoD cuMemcpyHtoD_v2 +#define cuMemcpyDtoH cuMemcpyDtoH_v2 +#define cuMemcpyDtoD cuMemcpyDtoD_v2 +#define cuMemcpyDtoA cuMemcpyDtoA_v2 +#define cuMemcpyAtoD cuMemcpyAtoD_v2 +#define cuMemcpyHtoA cuMemcpyHtoA_v2 +#define cuMemcpyAtoH cuMemcpyAtoH_v2 +#define cuMemcpyAtoA cuMemcpyAtoA_v2 +#define cuMemcpyHtoAAsync cuMemcpyHtoAAsync_v2 +#define cuMemcpyAtoHAsync cuMemcpyAtoHAsync_v2 +#define cuMemcpy2D cuMemcpy2D_v2 +#define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2 +#define cuMemcpy3D cuMemcpy3D_v2 +#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2 +#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2 +#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2 +#define cuMemcpy2DAsync cuMemcpy2DAsync_v2 +#define cuMemcpy3DAsync cuMemcpy3DAsync_v2 +#define cuMemsetD8 cuMemsetD8_v2 +#define cuMemsetD16 cuMemsetD16_v2 +#define cuMemsetD32 cuMemsetD32_v2 +#define cuMemsetD2D8 cuMemsetD2D8_v2 +#define cuMemsetD2D16 cuMemsetD2D16_v2 +#define cuMemsetD2D32 cuMemsetD2D32_v2 +#define cuArrayCreate cuArrayCreate_v2 +#define cuArrayGetDescriptor cuArrayGetDescriptor_v2 +#define cuArray3DCreate cuArray3DCreate_v2 +#define cuArray3DGetDescriptor cuArray3DGetDescriptor_v2 +#define cuTexRefSetAddress cuTexRefSetAddress_v2 +#define cuTexRefGetAddress cuTexRefGetAddress_v2 +#define cuGraphicsResourceGetMappedPointer cuGraphicsResourceGetMappedPointer_v2 +#define cuCtxDestroy cuCtxDestroy_v2 +#define cuCtxPopCurrent cuCtxPopCurrent_v2 +#define cuCtxPushCurrent cuCtxPushCurrent_v2 +#define cuStreamDestroy cuStreamDestroy_v2 +#define cuEventDestroy cuEventDestroy_v2 +#define cuTexRefSetAddress2D cuTexRefSetAddress2D_v2 +#define cuGLCtxCreate cuGLCtxCreate_v2 +#define cuGLMapBufferObject cuGLMapBufferObject_v2 +#define cuGLMapBufferObjectAsync cuGLMapBufferObjectAsync_v2 + +/* Types. */ +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +typedef unsigned long long CUdeviceptr; +#else +typedef unsigned int CUdeviceptr; +#endif + +typedef int CUdevice; +typedef struct CUctx_st* CUcontext; +typedef struct CUmod_st* CUmodule; +typedef struct CUfunc_st* CUfunction; +typedef struct CUarray_st* CUarray; +typedef struct CUmipmappedArray_st* CUmipmappedArray; +typedef struct CUtexref_st* CUtexref; +typedef struct CUsurfref_st* CUsurfref; +typedef struct CUevent_st* CUevent; +typedef struct CUstream_st* CUstream; +typedef struct CUgraphicsResource_st* CUgraphicsResource; +typedef unsigned CUtexObject; +typedef unsigned CUsurfObject; + +typedef struct CUuuid_st { + char bytes[16]; +} CUuuid; + +typedef struct CUipcEventHandle_st { + char reserved[CU_IPC_HANDLE_SIZE]; +} CUipcEventHandle; + +typedef struct CUipcMemHandle_st { + char reserved[CU_IPC_HANDLE_SIZE]; +} CUipcMemHandle; + +typedef enum CUipcMem_flags_enum { + CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1, +} CUipcMem_flags; + +typedef enum CUmemAttach_flags_enum { + CU_MEM_ATTACH_GLOBAL = 0x1, + CU_MEM_ATTACH_HOST = 0x2, + CU_MEM_ATTACH_SINGLE = 0x4, +} CUmemAttach_flags; + +typedef enum CUctx_flags_enum { + CU_CTX_SCHED_AUTO = 0x00, + CU_CTX_SCHED_SPIN = 0x01, + CU_CTX_SCHED_YIELD = 0x02, + CU_CTX_SCHED_BLOCKING_SYNC = 0x04, + CU_CTX_BLOCKING_SYNC = 0x04, + CU_CTX_SCHED_MASK = 0x07, + CU_CTX_MAP_HOST = 0x08, + CU_CTX_LMEM_RESIZE_TO_MAX = 0x10, + CU_CTX_FLAGS_MASK = 0x1f, +} CUctx_flags; + +typedef enum CUstream_flags_enum { + CU_STREAM_DEFAULT = 0x0, + CU_STREAM_NON_BLOCKING = 0x1, +} CUstream_flags; + +typedef enum CUevent_flags_enum { + CU_EVENT_DEFAULT = 0x0, + CU_EVENT_BLOCKING_SYNC = 0x1, + CU_EVENT_DISABLE_TIMING = 0x2, + CU_EVENT_INTERPROCESS = 0x4, +} CUevent_flags; + +typedef enum CUarray_format_enum { + CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, + CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, + CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, + CU_AD_FORMAT_SIGNED_INT8 = 0x08, + CU_AD_FORMAT_SIGNED_INT16 = 0x09, + CU_AD_FORMAT_SIGNED_INT32 = 0x0a, + CU_AD_FORMAT_HALF = 0x10, + CU_AD_FORMAT_FLOAT = 0x20, +} CUarray_format; + +typedef enum CUaddress_mode_enum { + CU_TR_ADDRESS_MODE_WRAP = 0, + CU_TR_ADDRESS_MODE_CLAMP = 1, + CU_TR_ADDRESS_MODE_MIRROR = 2, + CU_TR_ADDRESS_MODE_BORDER = 3, +} CUaddress_mode; + +typedef enum CUfilter_mode_enum { + CU_TR_FILTER_MODE_POINT = 0, + CU_TR_FILTER_MODE_LINEAR = 1, +} CUfilter_mode; + +typedef enum CUdevice_attribute_enum { + CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, + CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, + CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, + CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, + CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, + CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, + CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, + CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, + CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, + CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, + CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, + CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, + CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, + CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, + CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, + CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, + CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, + CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, + CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, + CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, + CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35, + CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, + CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, + CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, + CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, + CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, + CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, + CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, + CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50, + CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, + CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78, + CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79, + CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80, + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, + CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82, + CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83, + CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84, + CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85, + CU_DEVICE_ATTRIBUTE_MAX, +} CUdevice_attribute; + +typedef struct CUdevprop_st { + int maxThreadsPerBlock; + int maxThreadsDim[3]; + int maxGridSize[3]; + int sharedMemPerBlock; + int totalConstantMemory; + int SIMDWidth; + int memPitch; + int regsPerBlock; + int clockRate; + int textureAlign; +} CUdevprop; + +typedef enum CUpointer_attribute_enum { + CU_POINTER_ATTRIBUTE_CONTEXT = 1, + CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2, + CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3, + CU_POINTER_ATTRIBUTE_HOST_POINTER = 4, + CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5, + CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6, + CU_POINTER_ATTRIBUTE_BUFFER_ID = 7, + CU_POINTER_ATTRIBUTE_IS_MANAGED = 8, +} CUpointer_attribute; + +typedef enum CUfunction_attribute_enum { + CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, + CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, + CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, + CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, + CU_FUNC_ATTRIBUTE_NUM_REGS = 4, + CU_FUNC_ATTRIBUTE_PTX_VERSION = 5, + CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6, + CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7, + CU_FUNC_ATTRIBUTE_MAX, +} CUfunction_attribute; + +typedef enum CUfunc_cache_enum { + CU_FUNC_CACHE_PREFER_NONE = 0x00, + CU_FUNC_CACHE_PREFER_SHARED = 0x01, + CU_FUNC_CACHE_PREFER_L1 = 0x02, + CU_FUNC_CACHE_PREFER_EQUAL = 0x03, +} CUfunc_cache; + +typedef enum CUsharedconfig_enum { + CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00, + CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01, + CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02, +} CUsharedconfig; + +typedef enum CUmemorytype_enum { + CU_MEMORYTYPE_HOST = 0x01, + CU_MEMORYTYPE_DEVICE = 0x02, + CU_MEMORYTYPE_ARRAY = 0x03, + CU_MEMORYTYPE_UNIFIED = 0x04, +} CUmemorytype; + +typedef enum CUcomputemode_enum { + CU_COMPUTEMODE_DEFAULT = 0, + CU_COMPUTEMODE_EXCLUSIVE = 1, + CU_COMPUTEMODE_PROHIBITED = 2, + CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3, +} CUcomputemode; + +typedef enum CUjit_option_enum { + CU_JIT_MAX_REGISTERS = 0, + CU_JIT_THREADS_PER_BLOCK, + CU_JIT_WALL_TIME, + CU_JIT_INFO_LOG_BUFFER, + CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, + CU_JIT_ERROR_LOG_BUFFER, + CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, + CU_JIT_OPTIMIZATION_LEVEL, + CU_JIT_TARGET_FROM_CUCONTEXT, + CU_JIT_TARGET, + CU_JIT_FALLBACK_STRATEGY, + CU_JIT_GENERATE_DEBUG_INFO, + CU_JIT_LOG_VERBOSE, + CU_JIT_GENERATE_LINE_INFO, + CU_JIT_CACHE_MODE, + CU_JIT_NUM_OPTIONS, +} CUjit_option; + +typedef enum CUjit_target_enum { + CU_TARGET_COMPUTE_10 = 10, + CU_TARGET_COMPUTE_11 = 11, + CU_TARGET_COMPUTE_12 = 12, + CU_TARGET_COMPUTE_13 = 13, + CU_TARGET_COMPUTE_20 = 20, + CU_TARGET_COMPUTE_21 = 21, + CU_TARGET_COMPUTE_30 = 30, + CU_TARGET_COMPUTE_32 = 32, + CU_TARGET_COMPUTE_35 = 35, + CU_TARGET_COMPUTE_50 = 50, +} CUjit_target; + +typedef enum CUjit_fallback_enum { + CU_PREFER_PTX = 0, + CU_PREFER_BINARY, +} CUjit_fallback; + +typedef enum CUjit_cacheMode_enum { + CU_JIT_CACHE_OPTION_NONE = 0, + CU_JIT_CACHE_OPTION_CG, + CU_JIT_CACHE_OPTION_CA, +} CUjit_cacheMode; + +typedef enum CUjitInputType_enum { + CU_JIT_INPUT_CUBIN = 0, + CU_JIT_INPUT_PTX, + CU_JIT_INPUT_FATBINARY, + CU_JIT_INPUT_OBJECT, + CU_JIT_INPUT_LIBRARY, + CU_JIT_NUM_INPUT_TYPES, +} CUjitInputType; + +typedef struct CUlinkState_st* CUlinkState; + +typedef enum CUgraphicsRegisterFlags_enum { + CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00, + CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01, + CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02, + CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04, + CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08, +} CUgraphicsRegisterFlags; + +typedef enum CUgraphicsMapResourceFlags_enum { + CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00, + CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, + CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02, +} CUgraphicsMapResourceFlags; + +typedef enum CUarray_cubemap_face_enum { + CU_CUBEMAP_FACE_POSITIVE_X = 0x00, + CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, + CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, + CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, + CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, + CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05, +} CUarray_cubemap_face; + +typedef enum CUlimit_enum { + CU_LIMIT_STACK_SIZE = 0x00, + CU_LIMIT_PRINTF_FIFO_SIZE = 0x01, + CU_LIMIT_MALLOC_HEAP_SIZE = 0x02, + CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03, + CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04, + CU_LIMIT_MAX, +} CUlimit; + +typedef enum CUresourcetype_enum { + CU_RESOURCE_TYPE_ARRAY = 0x00, + CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, + CU_RESOURCE_TYPE_LINEAR = 0x02, + CU_RESOURCE_TYPE_PITCH2D = 0x03, +} CUresourcetype; + +typedef enum cudaError_enum { + CUDA_SUCCESS = 0, + CUDA_ERROR_INVALID_VALUE = 1, + CUDA_ERROR_OUT_OF_MEMORY = 2, + CUDA_ERROR_NOT_INITIALIZED = 3, + CUDA_ERROR_DEINITIALIZED = 4, + CUDA_ERROR_PROFILER_DISABLED = 5, + CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6, + CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, + CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, + CUDA_ERROR_NO_DEVICE = 100, + CUDA_ERROR_INVALID_DEVICE = 101, + CUDA_ERROR_INVALID_IMAGE = 200, + CUDA_ERROR_INVALID_CONTEXT = 201, + CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, + CUDA_ERROR_MAP_FAILED = 205, + CUDA_ERROR_UNMAP_FAILED = 206, + CUDA_ERROR_ARRAY_IS_MAPPED = 207, + CUDA_ERROR_ALREADY_MAPPED = 208, + CUDA_ERROR_NO_BINARY_FOR_GPU = 209, + CUDA_ERROR_ALREADY_ACQUIRED = 210, + CUDA_ERROR_NOT_MAPPED = 211, + CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, + CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, + CUDA_ERROR_ECC_UNCORRECTABLE = 214, + CUDA_ERROR_UNSUPPORTED_LIMIT = 215, + CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, + CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217, + CUDA_ERROR_INVALID_PTX = 218, + CUDA_ERROR_INVALID_SOURCE = 300, + CUDA_ERROR_FILE_NOT_FOUND = 301, + CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, + CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, + CUDA_ERROR_OPERATING_SYSTEM = 304, + CUDA_ERROR_INVALID_HANDLE = 400, + CUDA_ERROR_NOT_FOUND = 500, + CUDA_ERROR_NOT_READY = 600, + CUDA_ERROR_ILLEGAL_ADDRESS = 700, + CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, + CUDA_ERROR_LAUNCH_TIMEOUT = 702, + CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, + CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, + CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705, + CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, + CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, + CUDA_ERROR_ASSERT = 710, + CUDA_ERROR_TOO_MANY_PEERS = 711, + CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712, + CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713, + CUDA_ERROR_HARDWARE_STACK_ERROR = 714, + CUDA_ERROR_ILLEGAL_INSTRUCTION = 715, + CUDA_ERROR_MISALIGNED_ADDRESS = 716, + CUDA_ERROR_INVALID_ADDRESS_SPACE = 717, + CUDA_ERROR_INVALID_PC = 718, + CUDA_ERROR_LAUNCH_FAILED = 719, + CUDA_ERROR_NOT_PERMITTED = 800, + CUDA_ERROR_NOT_SUPPORTED = 801, + CUDA_ERROR_UNKNOWN = 999, +} CUresult; + +typedef void* CUstreamCallback; + +typedef struct CUDA_MEMCPY2D_st { + size_t srcXInBytes; + size_t srcY; + CUmemorytype srcMemoryType; + const void* srcHost; + CUdeviceptr srcDevice; + CUarray srcArray; + size_t srcPitch; + size_t dstXInBytes; + size_t dstY; + CUmemorytype dstMemoryType; + void* dstHost; + CUdeviceptr dstDevice; + CUarray dstArray; + size_t dstPitch; + size_t WidthInBytes; + size_t Height; +} CUDA_MEMCPY2D; + +typedef struct CUDA_MEMCPY3D_st { + size_t srcXInBytes; + size_t srcY; + size_t srcZ; + size_t srcLOD; + CUmemorytype srcMemoryType; + const void* srcHost; + CUdeviceptr srcDevice; + CUarray srcArray; + void* reserved0; + size_t srcPitch; + size_t srcHeight; + size_t dstXInBytes; + size_t dstY; + size_t dstZ; + size_t dstLOD; + CUmemorytype dstMemoryType; + void* dstHost; + CUdeviceptr dstDevice; + CUarray dstArray; + void* reserved1; + size_t dstPitch; + size_t dstHeight; + size_t WidthInBytes; + size_t Height; + size_t Depth; +} CUDA_MEMCPY3D; + +typedef struct CUDA_MEMCPY3D_PEER_st { + size_t srcXInBytes; + size_t srcY; + size_t srcZ; + size_t srcLOD; + CUmemorytype srcMemoryType; + const void* srcHost; + CUdeviceptr srcDevice; + CUarray srcArray; + CUcontext srcContext; + size_t srcPitch; + size_t srcHeight; + size_t dstXInBytes; + size_t dstY; + size_t dstZ; + size_t dstLOD; + CUmemorytype dstMemoryType; + void* dstHost; + CUdeviceptr dstDevice; + CUarray dstArray; + CUcontext dstContext; + size_t dstPitch; + size_t dstHeight; + size_t WidthInBytes; + size_t Height; + size_t Depth; +} CUDA_MEMCPY3D_PEER; + +typedef struct CUDA_ARRAY_DESCRIPTOR_st { + size_t Width; + size_t Height; + CUarray_format Format; + unsigned NumChannels; +} CUDA_ARRAY_DESCRIPTOR; + +typedef struct CUDA_ARRAY3D_DESCRIPTOR_st { + size_t Width; + size_t Height; + size_t Depth; + CUarray_format Format; + unsigned NumChannels; + unsigned Flags; +} CUDA_ARRAY3D_DESCRIPTOR; + +typedef struct CUDA_RESOURCE_DESC_st { + CUresourcetype resType; + union { + struct { + CUarray hArray; + } array; + struct { + CUmipmappedArray hMipmappedArray; + } mipmap; + struct { + CUdeviceptr devPtr; + CUarray_format format; + unsigned numChannels; + size_t sizeInBytes; + } linear; + struct { + CUdeviceptr devPtr; + CUarray_format format; + unsigned numChannels; + size_t width; + size_t height; + size_t pitchInBytes; + } pitch2D; + struct { + int reserved[32]; + } reserved; + } res; + unsigned flags; +} CUDA_RESOURCE_DESC; + +typedef struct CUDA_TEXTURE_DESC_st { + CUaddress_mode addressMode[3]; + CUfilter_mode filterMode; + unsigned flags; + unsigned maxAnisotropy; + CUfilter_mode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; + int reserved[16]; +} CUDA_TEXTURE_DESC; + +typedef enum CUresourceViewFormat_enum { + CU_RES_VIEW_FORMAT_NONE = 0x00, + CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01, + CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02, + CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03, + CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04, + CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05, + CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06, + CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07, + CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08, + CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09, + CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, + CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, + CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, + CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, + CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, + CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, + CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10, + CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11, + CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12, + CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, + CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, + CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, + CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, + CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, + CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, + CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, + CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, + CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, + CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, + CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, + CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, + CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, + CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, + CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, + CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22, +} CUresourceViewFormat; + +typedef struct CUDA_RESOURCE_VIEW_DESC_st { + CUresourceViewFormat format; + size_t width; + size_t height; + size_t depth; + unsigned firstMipmapLevel; + unsigned lastMipmapLevel; + unsigned firstLayer; + unsigned lastLayer; + unsigned reserved[16]; +} CUDA_RESOURCE_VIEW_DESC; + +typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st { + unsigned p2pToken; + unsigned vaSpaceToken; +} CUDA_POINTER_ATTRIBUTE_P2P_TOKENS; +typedef unsigned GLenum; +typedef unsigned GLuint; +typedef int GLint; + +typedef enum CUGLDeviceList_enum { + CU_GL_DEVICE_LIST_ALL = 0x01, + CU_GL_DEVICE_LIST_CURRENT_FRAME = 0x02, + CU_GL_DEVICE_LIST_NEXT_FRAME = 0x03, +} CUGLDeviceList; + +typedef enum CUGLmap_flags_enum { + CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00, + CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, + CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02, +} CUGLmap_flags; + +#ifdef _WIN32 +# define CUDAAPI __stdcall +# define CUDA_CB __stdcall +#else +# define CUDAAPI +# define CUDA_CB +#endif + +/* Function types. */ +typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char* pStr); +typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char* pStr); +typedef CUresult CUDAAPI tcuInit(unsigned Flags); +typedef CUresult CUDAAPI tcuDriverGetVersion(int* driverVersion); +typedef CUresult CUDAAPI tcuDeviceGet(CUdevice* device, int ordinal); +typedef CUresult CUDAAPI tcuDeviceGetCount(int* count); +typedef CUresult CUDAAPI tcuDeviceGetName(char* name, int len, CUdevice dev); +typedef CUresult CUDAAPI tcuDeviceTotalMem_v2(size_t* bytes, CUdevice dev); +typedef CUresult CUDAAPI tcuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev); +typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop* prop, CUdevice dev); +typedef CUresult CUDAAPI tcuDeviceComputeCapability(int* major, int* minor, CUdevice dev); +typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext* pctx, unsigned flags, CUdevice dev); +typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx); +typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext ctx); +typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext* pctx); +typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx); +typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext* pctx); +typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice* device); +typedef CUresult CUDAAPI tcuCtxSynchronize(void); +typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value); +typedef CUresult CUDAAPI tcuCtxGetLimit(size_t* pvalue, CUlimit limit); +typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache* pconfig); +typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config); +typedef CUresult CUDAAPI tcuCtxGetSharedMemConfig(CUsharedconfig* pConfig); +typedef CUresult CUDAAPI tcuCtxSetSharedMemConfig(CUsharedconfig config); +typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned* version); +typedef CUresult CUDAAPI tcuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority); +typedef CUresult CUDAAPI tcuCtxAttach(CUcontext* pctx, unsigned flags); +typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx); +typedef CUresult CUDAAPI tcuModuleLoad(CUmodule* module, const char* fname); +typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule* module, const void* image); +typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned numOptions, CUjit_option* options, void* optionValues); +typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule* module, const void* fatCubin); +typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod); +typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name); +typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name); +typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name); +typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name); +typedef CUresult CUDAAPI tcuLinkCreate(unsigned numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut); +typedef CUresult CUDAAPI tcuLinkAddData(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned numOptions, CUjit_option* options, void* optionValues); +typedef CUresult CUDAAPI tcuLinkAddFile(CUlinkState state, CUjitInputType type, const char* path, unsigned numOptions, CUjit_option* options, void* optionValues); +typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void* cubinOut, size_t* sizeOut); +typedef CUresult CUDAAPI tcuLinkDestroy(CUlinkState state); +typedef CUresult CUDAAPI tcuMemGetInfo_v2(size_t* free, size_t* total); +typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize); +typedef CUresult CUDAAPI tcuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned ElementSizeBytes); +typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr); +typedef CUresult CUDAAPI tcuMemGetAddressRange_v2(CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr); +typedef CUresult CUDAAPI tcuMemAllocHost_v2(void* pp, size_t bytesize); +typedef CUresult CUDAAPI tcuMemFreeHost(void* p); +typedef CUresult CUDAAPI tcuMemHostAlloc(void* pp, size_t bytesize, unsigned Flags); +typedef CUresult CUDAAPI tcuMemHostGetDevicePointer_v2(CUdeviceptr* pdptr, void* p, unsigned Flags); +typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned* pFlags, void* p); +typedef CUresult CUDAAPI tcuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned flags); +typedef CUresult CUDAAPI tcuDeviceGetByPCIBusId(CUdevice* dev, const char* pciBusId); +typedef CUresult CUDAAPI tcuDeviceGetPCIBusId(char* pciBusId, int len, CUdevice dev); +typedef CUresult CUDAAPI tcuIpcGetEventHandle(CUipcEventHandle* pHandle, CUevent event); +typedef CUresult CUDAAPI tcuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandle handle); +typedef CUresult CUDAAPI tcuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr); +typedef CUresult CUDAAPI tcuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned Flags); +typedef CUresult CUDAAPI tcuIpcCloseMemHandle(CUdeviceptr dptr); +typedef CUresult CUDAAPI tcuMemHostRegister(void* p, size_t bytesize, unsigned Flags); +typedef CUresult CUDAAPI tcuMemHostUnregister(void* p); +typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount); +typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount); +typedef CUresult CUDAAPI tcuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount); +typedef CUresult CUDAAPI tcuMemcpyDtoH_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount); +typedef CUresult CUDAAPI tcuMemcpyDtoD_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount); +typedef CUresult CUDAAPI tcuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount); +typedef CUresult CUDAAPI tcuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount); +typedef CUresult CUDAAPI tcuMemcpyHtoA_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount); +typedef CUresult CUDAAPI tcuMemcpyAtoH_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount); +typedef CUresult CUDAAPI tcuMemcpyAtoA_v2(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount); +typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D* pCopy); +typedef CUresult CUDAAPI tcuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D* pCopy); +typedef CUresult CUDAAPI tcuMemcpy3D_v2(const CUDA_MEMCPY3D* pCopy); +typedef CUresult CUDAAPI tcuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER* pCopy); +typedef CUresult CUDAAPI tcuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream); +typedef CUresult CUDAAPI tcuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream); +typedef CUresult CUDAAPI tcuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount, CUstream hStream); +typedef CUresult CUDAAPI tcuMemcpyDtoHAsync_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); +typedef CUresult CUDAAPI tcuMemcpyDtoDAsync_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); +typedef CUresult CUDAAPI tcuMemcpyHtoAAsync_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount, CUstream hStream); +typedef CUresult CUDAAPI tcuMemcpyAtoHAsync_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream); +typedef CUresult CUDAAPI tcuMemcpy2DAsync_v2(const CUDA_MEMCPY2D* pCopy, CUstream hStream); +typedef CUresult CUDAAPI tcuMemcpy3DAsync_v2(const CUDA_MEMCPY3D* pCopy, CUstream hStream); +typedef CUresult CUDAAPI tcuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER* pCopy, CUstream hStream); +typedef CUresult CUDAAPI tcuMemsetD8_v2(CUdeviceptr dstDevice, unsigned uc, size_t N); +typedef CUresult CUDAAPI tcuMemsetD16_v2(CUdeviceptr dstDevice, unsigned us, size_t N); +typedef CUresult CUDAAPI tcuMemsetD32_v2(CUdeviceptr dstDevice, unsigned ui, size_t N); +typedef CUresult CUDAAPI tcuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned uc, size_t Width, size_t Height); +typedef CUresult CUDAAPI tcuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned us, size_t Width, size_t Height); +typedef CUresult CUDAAPI tcuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned ui, size_t Width, size_t Height); +typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned uc, size_t N, CUstream hStream); +typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned us, size_t N, CUstream hStream); +typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned ui, size_t N, CUstream hStream); +typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned uc, size_t Width, size_t Height, CUstream hStream); +typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned us, size_t Width, size_t Height, CUstream hStream); +typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned ui, size_t Width, size_t Height, CUstream hStream); +typedef CUresult CUDAAPI tcuArrayCreate_v2(CUarray* pHandle, const CUDA_ARRAY_DESCRIPTOR* pAllocateArray); +typedef CUresult CUDAAPI tcuArrayGetDescriptor_v2(CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor, CUarray hArray); +typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray); +typedef CUresult CUDAAPI tcuArray3DCreate_v2(CUarray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray); +typedef CUresult CUDAAPI tcuArray3DGetDescriptor_v2(CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor, CUarray hArray); +typedef CUresult CUDAAPI tcuMipmappedArrayCreate(CUmipmappedArray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, unsigned numMipmapLevels); +typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned level); +typedef CUresult CUDAAPI tcuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray); +typedef CUresult CUDAAPI tcuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr); +typedef CUresult CUDAAPI tcuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr); +typedef CUresult CUDAAPI tcuStreamCreate(CUstream* phStream, unsigned Flags); +typedef CUresult CUDAAPI tcuStreamCreateWithPriority(CUstream* phStream, unsigned flags, int priority); +typedef CUresult CUDAAPI tcuStreamGetPriority(CUstream hStream, int* priority); +typedef CUresult CUDAAPI tcuStreamGetFlags(CUstream hStream, unsigned* flags); +typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned Flags); +typedef CUresult CUDAAPI tcuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void* userData, unsigned flags); +typedef CUresult CUDAAPI tcuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned flags); +typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream); +typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream); +typedef CUresult CUDAAPI tcuStreamDestroy_v2(CUstream hStream); +typedef CUresult CUDAAPI tcuEventCreate(CUevent* phEvent, unsigned Flags); +typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream); +typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent); +typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent); +typedef CUresult CUDAAPI tcuEventDestroy_v2(CUevent hEvent); +typedef CUresult CUDAAPI tcuEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd); +typedef CUresult CUDAAPI tcuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc); +typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config); +typedef CUresult CUDAAPI tcuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config); +typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned gridDimX, unsigned gridDimY, unsigned gridDimZ, unsigned blockDimX, unsigned blockDimY, unsigned blockDimZ, unsigned sharedMemBytes, CUstream hStream, void* kernelParams, void* extra); +typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z); +typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned bytes); +typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned numbytes); +typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned value); +typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value); +typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void* ptr, unsigned numbytes); +typedef CUresult CUDAAPI tcuLaunch(CUfunction f); +typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height); +typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream); +typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned Flags); +typedef CUresult CUDAAPI tcuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned Flags); +typedef CUresult CUDAAPI tcuTexRefSetAddress_v2(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes); +typedef CUresult CUDAAPI tcuTexRefSetAddress2D_v3(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR* desc, CUdeviceptr dptr, size_t Pitch); +typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents); +typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am); +typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm); +typedef CUresult CUDAAPI tcuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm); +typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias); +typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp); +typedef CUresult CUDAAPI tcuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned maxAniso); +typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned Flags); +typedef CUresult CUDAAPI tcuTexRefGetAddress_v2(CUdeviceptr* pdptr, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray* phArray, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuTexRefGetMipmappedArray(CUmipmappedArray* phMipmappedArray, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode* pam, CUtexref hTexRef, int dim); +typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode* pfm, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format* pFormat, int* pNumChannels, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuTexRefGetMipmapFilterMode(CUfilter_mode* pfm, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelBias(float* pbias, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuTexRefGetMaxAnisotropy(int* pmaxAniso, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned* pFlags, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref* pTexRef); +typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef); +typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned Flags); +typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray* phArray, CUsurfref hSurfRef); +typedef CUresult CUDAAPI tcuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc); +typedef CUresult CUDAAPI tcuTexObjectDestroy(CUtexObject texObject); +typedef CUresult CUDAAPI tcuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUtexObject texObject); +typedef CUresult CUDAAPI tcuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC* pTexDesc, CUtexObject texObject); +typedef CUresult CUDAAPI tcuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC* pResViewDesc, CUtexObject texObject); +typedef CUresult CUDAAPI tcuSurfObjectCreate(CUsurfObject* pSurfObject, const CUDA_RESOURCE_DESC* pResDesc); +typedef CUresult CUDAAPI tcuSurfObjectDestroy(CUsurfObject surfObject); +typedef CUresult CUDAAPI tcuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUsurfObject surfObject); +typedef CUresult CUDAAPI tcuDeviceCanAccessPeer(int* canAccessPeer, CUdevice dev, CUdevice peerDev); +typedef CUresult CUDAAPI tcuCtxEnablePeerAccess(CUcontext peerContext, unsigned Flags); +typedef CUresult CUDAAPI tcuCtxDisablePeerAccess(CUcontext peerContext); +typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource); +typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned arrayIndex, unsigned mipLevel); +typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource); +typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer_v2(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource); +typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned flags); +typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream); +typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream); +typedef CUresult CUDAAPI tcuGetExportTable(const void* ppExportTable, const CUuuid* pExportTableId); + +typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned Flags); +typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned Flags); +typedef CUresult CUDAAPI tcuGLGetDevices(unsigned* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned cudaDeviceCount, CUGLDeviceList deviceList); +typedef CUresult CUDAAPI tcuGLCtxCreate_v2(CUcontext* pCtx, unsigned Flags, CUdevice device); +typedef CUresult CUDAAPI tcuGLInit(void); +typedef CUresult CUDAAPI tcuGLRegisterBufferObject(GLuint buffer); +typedef CUresult CUDAAPI tcuGLMapBufferObject_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer); +typedef CUresult CUDAAPI tcuGLUnmapBufferObject(GLuint buffer); +typedef CUresult CUDAAPI tcuGLUnregisterBufferObject(GLuint buffer); +typedef CUresult CUDAAPI tcuGLSetBufferObjectMapFlags(GLuint buffer, unsigned Flags); +typedef CUresult CUDAAPI tcuGLMapBufferObjectAsync_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer, CUstream hStream); +typedef CUresult CUDAAPI tcuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream); + + +/* Function declarations. */ +extern tcuGetErrorString *cuGetErrorString; +extern tcuGetErrorName *cuGetErrorName; +extern tcuInit *cuInit; +extern tcuDriverGetVersion *cuDriverGetVersion; +extern tcuDeviceGet *cuDeviceGet; +extern tcuDeviceGetCount *cuDeviceGetCount; +extern tcuDeviceGetName *cuDeviceGetName; +extern tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2; +extern tcuDeviceGetAttribute *cuDeviceGetAttribute; +extern tcuDeviceGetProperties *cuDeviceGetProperties; +extern tcuDeviceComputeCapability *cuDeviceComputeCapability; +extern tcuCtxCreate_v2 *cuCtxCreate_v2; +extern tcuCtxDestroy_v2 *cuCtxDestroy_v2; +extern tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2; +extern tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2; +extern tcuCtxSetCurrent *cuCtxSetCurrent; +extern tcuCtxGetCurrent *cuCtxGetCurrent; +extern tcuCtxGetDevice *cuCtxGetDevice; +extern tcuCtxSynchronize *cuCtxSynchronize; +extern tcuCtxSetLimit *cuCtxSetLimit; +extern tcuCtxGetLimit *cuCtxGetLimit; +extern tcuCtxGetCacheConfig *cuCtxGetCacheConfig; +extern tcuCtxSetCacheConfig *cuCtxSetCacheConfig; +extern tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig; +extern tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig; +extern tcuCtxGetApiVersion *cuCtxGetApiVersion; +extern tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange; +extern tcuCtxAttach *cuCtxAttach; +extern tcuCtxDetach *cuCtxDetach; +extern tcuModuleLoad *cuModuleLoad; +extern tcuModuleLoadData *cuModuleLoadData; +extern tcuModuleLoadDataEx *cuModuleLoadDataEx; +extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary; +extern tcuModuleUnload *cuModuleUnload; +extern tcuModuleGetFunction *cuModuleGetFunction; +extern tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2; +extern tcuModuleGetTexRef *cuModuleGetTexRef; +extern tcuModuleGetSurfRef *cuModuleGetSurfRef; +extern tcuLinkCreate *cuLinkCreate; +extern tcuLinkAddData *cuLinkAddData; +extern tcuLinkAddFile *cuLinkAddFile; +extern tcuLinkComplete *cuLinkComplete; +extern tcuLinkDestroy *cuLinkDestroy; +extern tcuMemGetInfo_v2 *cuMemGetInfo_v2; +extern tcuMemAlloc_v2 *cuMemAlloc_v2; +extern tcuMemAllocPitch_v2 *cuMemAllocPitch_v2; +extern tcuMemFree_v2 *cuMemFree_v2; +extern tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2; +extern tcuMemAllocHost_v2 *cuMemAllocHost_v2; +extern tcuMemFreeHost *cuMemFreeHost; +extern tcuMemHostAlloc *cuMemHostAlloc; +extern tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2; +extern tcuMemHostGetFlags *cuMemHostGetFlags; +extern tcuMemAllocManaged *cuMemAllocManaged; +extern tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId; +extern tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId; +extern tcuIpcGetEventHandle *cuIpcGetEventHandle; +extern tcuIpcOpenEventHandle *cuIpcOpenEventHandle; +extern tcuIpcGetMemHandle *cuIpcGetMemHandle; +extern tcuIpcOpenMemHandle *cuIpcOpenMemHandle; +extern tcuIpcCloseMemHandle *cuIpcCloseMemHandle; +extern tcuMemHostRegister *cuMemHostRegister; +extern tcuMemHostUnregister *cuMemHostUnregister; +extern tcuMemcpy *cuMemcpy; +extern tcuMemcpyPeer *cuMemcpyPeer; +extern tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2; +extern tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2; +extern tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2; +extern tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2; +extern tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2; +extern tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2; +extern tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2; +extern tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2; +extern tcuMemcpy2D_v2 *cuMemcpy2D_v2; +extern tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2; +extern tcuMemcpy3D_v2 *cuMemcpy3D_v2; +extern tcuMemcpy3DPeer *cuMemcpy3DPeer; +extern tcuMemcpyAsync *cuMemcpyAsync; +extern tcuMemcpyPeerAsync *cuMemcpyPeerAsync; +extern tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2; +extern tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2; +extern tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2; +extern tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2; +extern tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2; +extern tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2; +extern tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2; +extern tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync; +extern tcuMemsetD8_v2 *cuMemsetD8_v2; +extern tcuMemsetD16_v2 *cuMemsetD16_v2; +extern tcuMemsetD32_v2 *cuMemsetD32_v2; +extern tcuMemsetD2D8_v2 *cuMemsetD2D8_v2; +extern tcuMemsetD2D16_v2 *cuMemsetD2D16_v2; +extern tcuMemsetD2D32_v2 *cuMemsetD2D32_v2; +extern tcuMemsetD8Async *cuMemsetD8Async; +extern tcuMemsetD16Async *cuMemsetD16Async; +extern tcuMemsetD32Async *cuMemsetD32Async; +extern tcuMemsetD2D8Async *cuMemsetD2D8Async; +extern tcuMemsetD2D16Async *cuMemsetD2D16Async; +extern tcuMemsetD2D32Async *cuMemsetD2D32Async; +extern tcuArrayCreate_v2 *cuArrayCreate_v2; +extern tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2; +extern tcuArrayDestroy *cuArrayDestroy; +extern tcuArray3DCreate_v2 *cuArray3DCreate_v2; +extern tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2; +extern tcuMipmappedArrayCreate *cuMipmappedArrayCreate; +extern tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel; +extern tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy; +extern tcuPointerGetAttribute *cuPointerGetAttribute; +extern tcuPointerSetAttribute *cuPointerSetAttribute; +extern tcuStreamCreate *cuStreamCreate; +extern tcuStreamCreateWithPriority *cuStreamCreateWithPriority; +extern tcuStreamGetPriority *cuStreamGetPriority; +extern tcuStreamGetFlags *cuStreamGetFlags; +extern tcuStreamWaitEvent *cuStreamWaitEvent; +extern tcuStreamAddCallback *cuStreamAddCallback; +extern tcuStreamAttachMemAsync *cuStreamAttachMemAsync; +extern tcuStreamQuery *cuStreamQuery; +extern tcuStreamSynchronize *cuStreamSynchronize; +extern tcuStreamDestroy_v2 *cuStreamDestroy_v2; +extern tcuEventCreate *cuEventCreate; +extern tcuEventRecord *cuEventRecord; +extern tcuEventQuery *cuEventQuery; +extern tcuEventSynchronize *cuEventSynchronize; +extern tcuEventDestroy_v2 *cuEventDestroy_v2; +extern tcuEventElapsedTime *cuEventElapsedTime; +extern tcuFuncGetAttribute *cuFuncGetAttribute; +extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig; +extern tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig; +extern tcuLaunchKernel *cuLaunchKernel; +extern tcuFuncSetBlockShape *cuFuncSetBlockShape; +extern tcuFuncSetSharedSize *cuFuncSetSharedSize; +extern tcuParamSetSize *cuParamSetSize; +extern tcuParamSeti *cuParamSeti; +extern tcuParamSetf *cuParamSetf; +extern tcuParamSetv *cuParamSetv; +extern tcuLaunch *cuLaunch; +extern tcuLaunchGrid *cuLaunchGrid; +extern tcuLaunchGridAsync *cuLaunchGridAsync; +extern tcuParamSetTexRef *cuParamSetTexRef; +extern tcuTexRefSetArray *cuTexRefSetArray; +extern tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray; +extern tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2; +extern tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3; +extern tcuTexRefSetFormat *cuTexRefSetFormat; +extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode; +extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode; +extern tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode; +extern tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias; +extern tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp; +extern tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy; +extern tcuTexRefSetFlags *cuTexRefSetFlags; +extern tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2; +extern tcuTexRefGetArray *cuTexRefGetArray; +extern tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray; +extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode; +extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode; +extern tcuTexRefGetFormat *cuTexRefGetFormat; +extern tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode; +extern tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias; +extern tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp; +extern tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy; +extern tcuTexRefGetFlags *cuTexRefGetFlags; +extern tcuTexRefCreate *cuTexRefCreate; +extern tcuTexRefDestroy *cuTexRefDestroy; +extern tcuSurfRefSetArray *cuSurfRefSetArray; +extern tcuSurfRefGetArray *cuSurfRefGetArray; +extern tcuTexObjectCreate *cuTexObjectCreate; +extern tcuTexObjectDestroy *cuTexObjectDestroy; +extern tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc; +extern tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc; +extern tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc; +extern tcuSurfObjectCreate *cuSurfObjectCreate; +extern tcuSurfObjectDestroy *cuSurfObjectDestroy; +extern tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc; +extern tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer; +extern tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess; +extern tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess; +extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; +extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; +extern tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray; +extern tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2; +extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags; +extern tcuGraphicsMapResources *cuGraphicsMapResources; +extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources; +extern tcuGetExportTable *cuGetExportTable; + +extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer; +extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage; +extern tcuGLGetDevices *cuGLGetDevices; +extern tcuGLCtxCreate_v2 *cuGLCtxCreate_v2; +extern tcuGLInit *cuGLInit; +extern tcuGLRegisterBufferObject *cuGLRegisterBufferObject; +extern tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2; +extern tcuGLUnmapBufferObject *cuGLUnmapBufferObject; +extern tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject; +extern tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags; +extern tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2; +extern tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync; + + +enum { + CUEW_SUCCESS = 0, + CUEW_ERROR_OPEN_FAILED = -1, + CUEW_ERROR_ATEXIT_FAILED = -2, +}; + +int cuewInit(void); +const char *cuewErrorString(CUresult result); +const char *cuewCompilerPath(void); +int cuewCompilerVersion(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __CUEW_H__ */ diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c new file mode 100644 index 00000000000..35ffca3991f --- /dev/null +++ b/extern/cuew/src/cuew.c @@ -0,0 +1,710 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +#ifdef _MSC_VER +# define snprintf _snprintf +# define popen _popen +# define pclose _pclose +# define _CRT_SECURE_NO_WARNINGS +#endif + +#include <cuew.h> +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> + +#ifdef _WIN32 +# define WIN32_LEAN_AND_MEAN +# define VC_EXTRALEAN +# include <windows.h> + +/* Utility macros. */ + +typedef HMODULE DynamicLibrary; + +# define dynamic_library_open(path) LoadLibrary(path) +# define dynamic_library_close(lib) FreeLibrary(lib) +# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol) +#else +# include <dlfcn.h> + +typedef void* DynamicLibrary; + +# define dynamic_library_open(path) dlopen(path, RTLD_NOW) +# define dynamic_library_close(lib) dlclose(lib) +# define dynamic_library_find(lib, symbol) dlsym(lib, symbol) +#endif + +#define CUDA_LIBRARY_FIND_CHECKED(name) \ + name = (t##name *)dynamic_library_find(lib, #name); + +#define CUDA_LIBRARY_FIND(name) \ + name = (t##name *)dynamic_library_find(lib, #name); \ + assert(name); + +static DynamicLibrary lib; + +/* Function definitions. */ +tcuGetErrorString *cuGetErrorString; +tcuGetErrorName *cuGetErrorName; +tcuInit *cuInit; +tcuDriverGetVersion *cuDriverGetVersion; +tcuDeviceGet *cuDeviceGet; +tcuDeviceGetCount *cuDeviceGetCount; +tcuDeviceGetName *cuDeviceGetName; +tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2; +tcuDeviceGetAttribute *cuDeviceGetAttribute; +tcuDeviceGetProperties *cuDeviceGetProperties; +tcuDeviceComputeCapability *cuDeviceComputeCapability; +tcuCtxCreate_v2 *cuCtxCreate_v2; +tcuCtxDestroy_v2 *cuCtxDestroy_v2; +tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2; +tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2; +tcuCtxSetCurrent *cuCtxSetCurrent; +tcuCtxGetCurrent *cuCtxGetCurrent; +tcuCtxGetDevice *cuCtxGetDevice; +tcuCtxSynchronize *cuCtxSynchronize; +tcuCtxSetLimit *cuCtxSetLimit; +tcuCtxGetLimit *cuCtxGetLimit; +tcuCtxGetCacheConfig *cuCtxGetCacheConfig; +tcuCtxSetCacheConfig *cuCtxSetCacheConfig; +tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig; +tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig; +tcuCtxGetApiVersion *cuCtxGetApiVersion; +tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange; +tcuCtxAttach *cuCtxAttach; +tcuCtxDetach *cuCtxDetach; +tcuModuleLoad *cuModuleLoad; +tcuModuleLoadData *cuModuleLoadData; +tcuModuleLoadDataEx *cuModuleLoadDataEx; +tcuModuleLoadFatBinary *cuModuleLoadFatBinary; +tcuModuleUnload *cuModuleUnload; +tcuModuleGetFunction *cuModuleGetFunction; +tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2; +tcuModuleGetTexRef *cuModuleGetTexRef; +tcuModuleGetSurfRef *cuModuleGetSurfRef; +tcuLinkCreate *cuLinkCreate; +tcuLinkAddData *cuLinkAddData; +tcuLinkAddFile *cuLinkAddFile; +tcuLinkComplete *cuLinkComplete; +tcuLinkDestroy *cuLinkDestroy; +tcuMemGetInfo_v2 *cuMemGetInfo_v2; +tcuMemAlloc_v2 *cuMemAlloc_v2; +tcuMemAllocPitch_v2 *cuMemAllocPitch_v2; +tcuMemFree_v2 *cuMemFree_v2; +tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2; +tcuMemAllocHost_v2 *cuMemAllocHost_v2; +tcuMemFreeHost *cuMemFreeHost; +tcuMemHostAlloc *cuMemHostAlloc; +tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2; +tcuMemHostGetFlags *cuMemHostGetFlags; +tcuMemAllocManaged *cuMemAllocManaged; +tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId; +tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId; +tcuIpcGetEventHandle *cuIpcGetEventHandle; +tcuIpcOpenEventHandle *cuIpcOpenEventHandle; +tcuIpcGetMemHandle *cuIpcGetMemHandle; +tcuIpcOpenMemHandle *cuIpcOpenMemHandle; +tcuIpcCloseMemHandle *cuIpcCloseMemHandle; +tcuMemHostRegister *cuMemHostRegister; +tcuMemHostUnregister *cuMemHostUnregister; +tcuMemcpy *cuMemcpy; +tcuMemcpyPeer *cuMemcpyPeer; +tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2; +tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2; +tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2; +tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2; +tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2; +tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2; +tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2; +tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2; +tcuMemcpy2D_v2 *cuMemcpy2D_v2; +tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2; +tcuMemcpy3D_v2 *cuMemcpy3D_v2; +tcuMemcpy3DPeer *cuMemcpy3DPeer; +tcuMemcpyAsync *cuMemcpyAsync; +tcuMemcpyPeerAsync *cuMemcpyPeerAsync; +tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2; +tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2; +tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2; +tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2; +tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2; +tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2; +tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2; +tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync; +tcuMemsetD8_v2 *cuMemsetD8_v2; +tcuMemsetD16_v2 *cuMemsetD16_v2; +tcuMemsetD32_v2 *cuMemsetD32_v2; +tcuMemsetD2D8_v2 *cuMemsetD2D8_v2; +tcuMemsetD2D16_v2 *cuMemsetD2D16_v2; +tcuMemsetD2D32_v2 *cuMemsetD2D32_v2; +tcuMemsetD8Async *cuMemsetD8Async; +tcuMemsetD16Async *cuMemsetD16Async; +tcuMemsetD32Async *cuMemsetD32Async; +tcuMemsetD2D8Async *cuMemsetD2D8Async; +tcuMemsetD2D16Async *cuMemsetD2D16Async; +tcuMemsetD2D32Async *cuMemsetD2D32Async; +tcuArrayCreate_v2 *cuArrayCreate_v2; +tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2; +tcuArrayDestroy *cuArrayDestroy; +tcuArray3DCreate_v2 *cuArray3DCreate_v2; +tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2; +tcuMipmappedArrayCreate *cuMipmappedArrayCreate; +tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel; +tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy; +tcuPointerGetAttribute *cuPointerGetAttribute; +tcuPointerSetAttribute *cuPointerSetAttribute; +tcuStreamCreate *cuStreamCreate; +tcuStreamCreateWithPriority *cuStreamCreateWithPriority; +tcuStreamGetPriority *cuStreamGetPriority; +tcuStreamGetFlags *cuStreamGetFlags; +tcuStreamWaitEvent *cuStreamWaitEvent; +tcuStreamAddCallback *cuStreamAddCallback; +tcuStreamAttachMemAsync *cuStreamAttachMemAsync; +tcuStreamQuery *cuStreamQuery; +tcuStreamSynchronize *cuStreamSynchronize; +tcuStreamDestroy_v2 *cuStreamDestroy_v2; +tcuEventCreate *cuEventCreate; +tcuEventRecord *cuEventRecord; +tcuEventQuery *cuEventQuery; +tcuEventSynchronize *cuEventSynchronize; +tcuEventDestroy_v2 *cuEventDestroy_v2; +tcuEventElapsedTime *cuEventElapsedTime; +tcuFuncGetAttribute *cuFuncGetAttribute; +tcuFuncSetCacheConfig *cuFuncSetCacheConfig; +tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig; +tcuLaunchKernel *cuLaunchKernel; +tcuFuncSetBlockShape *cuFuncSetBlockShape; +tcuFuncSetSharedSize *cuFuncSetSharedSize; +tcuParamSetSize *cuParamSetSize; +tcuParamSeti *cuParamSeti; +tcuParamSetf *cuParamSetf; +tcuParamSetv *cuParamSetv; +tcuLaunch *cuLaunch; +tcuLaunchGrid *cuLaunchGrid; +tcuLaunchGridAsync *cuLaunchGridAsync; +tcuParamSetTexRef *cuParamSetTexRef; +tcuTexRefSetArray *cuTexRefSetArray; +tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray; +tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2; +tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3; +tcuTexRefSetFormat *cuTexRefSetFormat; +tcuTexRefSetAddressMode *cuTexRefSetAddressMode; +tcuTexRefSetFilterMode *cuTexRefSetFilterMode; +tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode; +tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias; +tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp; +tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy; +tcuTexRefSetFlags *cuTexRefSetFlags; +tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2; +tcuTexRefGetArray *cuTexRefGetArray; +tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray; +tcuTexRefGetAddressMode *cuTexRefGetAddressMode; +tcuTexRefGetFilterMode *cuTexRefGetFilterMode; +tcuTexRefGetFormat *cuTexRefGetFormat; +tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode; +tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias; +tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp; +tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy; +tcuTexRefGetFlags *cuTexRefGetFlags; +tcuTexRefCreate *cuTexRefCreate; +tcuTexRefDestroy *cuTexRefDestroy; +tcuSurfRefSetArray *cuSurfRefSetArray; +tcuSurfRefGetArray *cuSurfRefGetArray; +tcuTexObjectCreate *cuTexObjectCreate; +tcuTexObjectDestroy *cuTexObjectDestroy; +tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc; +tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc; +tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc; +tcuSurfObjectCreate *cuSurfObjectCreate; +tcuSurfObjectDestroy *cuSurfObjectDestroy; +tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc; +tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer; +tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess; +tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess; +tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; +tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; +tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray; +tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2; +tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags; +tcuGraphicsMapResources *cuGraphicsMapResources; +tcuGraphicsUnmapResources *cuGraphicsUnmapResources; +tcuGetExportTable *cuGetExportTable; + +tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer; +tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage; +tcuGLGetDevices *cuGLGetDevices; +tcuGLCtxCreate_v2 *cuGLCtxCreate_v2; +tcuGLInit *cuGLInit; +tcuGLRegisterBufferObject *cuGLRegisterBufferObject; +tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2; +tcuGLUnmapBufferObject *cuGLUnmapBufferObject; +tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject; +tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags; +tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2; +tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync; + + +static void cuewExit(void) { + if(lib != NULL) { + /* Ignore errors. */ + dynamic_library_close(lib); + lib = NULL; + } +} + +/* Implementation function. */ +int cuewInit(void) { + /* Library paths. */ +#ifdef _WIN32 + /* Expected in c:/windows/system or similar, no path needed. */ + const char *path = "nvcuda.dll"; +#elif defined(__APPLE__) + /* Default installation path. */ + const char *path = "/usr/local/cuda/lib/libcuda.dylib"; +#else + const char *path = "libcuda.so"; +#endif + static int initialized = 0; + static int result = 0; + int error, driver_version; + + if (initialized) { + return result; + } + + initialized = 1; + + error = atexit(cuewExit); + if (error) { + result = CUEW_ERROR_ATEXIT_FAILED; + return result; + } + + /* Load library. */ + lib = dynamic_library_open(path); + + if (lib == NULL) { + result = CUEW_ERROR_OPEN_FAILED; + return result; + } + + /* Detect driver version. */ + driver_version = 1000; + + CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion); + if (cuDriverGetVersion) { + cuDriverGetVersion(&driver_version); + } + + /* We require version 4.0. */ + if (driver_version < 4000) { + result = CUEW_ERROR_OPEN_FAILED; + return result; + } + /* Fetch all function pointers. */ + CUDA_LIBRARY_FIND(cuGetErrorString); + CUDA_LIBRARY_FIND(cuGetErrorName); + CUDA_LIBRARY_FIND(cuInit); + CUDA_LIBRARY_FIND(cuDriverGetVersion); + CUDA_LIBRARY_FIND(cuDeviceGet); + CUDA_LIBRARY_FIND(cuDeviceGetCount); + CUDA_LIBRARY_FIND(cuDeviceGetName); + CUDA_LIBRARY_FIND(cuDeviceTotalMem_v2); + CUDA_LIBRARY_FIND(cuDeviceGetAttribute); + CUDA_LIBRARY_FIND(cuDeviceGetProperties); + CUDA_LIBRARY_FIND(cuDeviceComputeCapability); + CUDA_LIBRARY_FIND(cuCtxCreate_v2); + CUDA_LIBRARY_FIND(cuCtxDestroy_v2); + CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2); + CUDA_LIBRARY_FIND(cuCtxPopCurrent_v2); + CUDA_LIBRARY_FIND(cuCtxSetCurrent); + CUDA_LIBRARY_FIND(cuCtxGetCurrent); + CUDA_LIBRARY_FIND(cuCtxGetDevice); + CUDA_LIBRARY_FIND(cuCtxSynchronize); + CUDA_LIBRARY_FIND(cuCtxSetLimit); + CUDA_LIBRARY_FIND(cuCtxGetLimit); + CUDA_LIBRARY_FIND(cuCtxGetCacheConfig); + CUDA_LIBRARY_FIND(cuCtxSetCacheConfig); + CUDA_LIBRARY_FIND(cuCtxGetSharedMemConfig); + CUDA_LIBRARY_FIND(cuCtxSetSharedMemConfig); + CUDA_LIBRARY_FIND(cuCtxGetApiVersion); + CUDA_LIBRARY_FIND(cuCtxGetStreamPriorityRange); + CUDA_LIBRARY_FIND(cuCtxAttach); + CUDA_LIBRARY_FIND(cuCtxDetach); + CUDA_LIBRARY_FIND(cuModuleLoad); + CUDA_LIBRARY_FIND(cuModuleLoadData); + CUDA_LIBRARY_FIND(cuModuleLoadDataEx); + CUDA_LIBRARY_FIND(cuModuleLoadFatBinary); + CUDA_LIBRARY_FIND(cuModuleUnload); + CUDA_LIBRARY_FIND(cuModuleGetFunction); + CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2); + CUDA_LIBRARY_FIND(cuModuleGetTexRef); + CUDA_LIBRARY_FIND(cuModuleGetSurfRef); + CUDA_LIBRARY_FIND(cuLinkCreate); + CUDA_LIBRARY_FIND(cuLinkAddData); + CUDA_LIBRARY_FIND(cuLinkAddFile); + CUDA_LIBRARY_FIND(cuLinkComplete); + CUDA_LIBRARY_FIND(cuLinkDestroy); + CUDA_LIBRARY_FIND(cuMemGetInfo_v2); + CUDA_LIBRARY_FIND(cuMemAlloc_v2); + CUDA_LIBRARY_FIND(cuMemAllocPitch_v2); + CUDA_LIBRARY_FIND(cuMemFree_v2); + CUDA_LIBRARY_FIND(cuMemGetAddressRange_v2); + CUDA_LIBRARY_FIND(cuMemAllocHost_v2); + CUDA_LIBRARY_FIND(cuMemFreeHost); + CUDA_LIBRARY_FIND(cuMemHostAlloc); + CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer_v2); + CUDA_LIBRARY_FIND(cuMemHostGetFlags); + CUDA_LIBRARY_FIND(cuMemAllocManaged); + CUDA_LIBRARY_FIND(cuDeviceGetByPCIBusId); + CUDA_LIBRARY_FIND(cuDeviceGetPCIBusId); + CUDA_LIBRARY_FIND(cuIpcGetEventHandle); + CUDA_LIBRARY_FIND(cuIpcOpenEventHandle); + CUDA_LIBRARY_FIND(cuIpcGetMemHandle); + CUDA_LIBRARY_FIND(cuIpcOpenMemHandle); + CUDA_LIBRARY_FIND(cuIpcCloseMemHandle); + CUDA_LIBRARY_FIND(cuMemHostRegister); + CUDA_LIBRARY_FIND(cuMemHostUnregister); + CUDA_LIBRARY_FIND(cuMemcpy); + CUDA_LIBRARY_FIND(cuMemcpyPeer); + CUDA_LIBRARY_FIND(cuMemcpyHtoD_v2); + CUDA_LIBRARY_FIND(cuMemcpyDtoH_v2); + CUDA_LIBRARY_FIND(cuMemcpyDtoD_v2); + CUDA_LIBRARY_FIND(cuMemcpyDtoA_v2); + CUDA_LIBRARY_FIND(cuMemcpyAtoD_v2); + CUDA_LIBRARY_FIND(cuMemcpyHtoA_v2); + CUDA_LIBRARY_FIND(cuMemcpyAtoH_v2); + CUDA_LIBRARY_FIND(cuMemcpyAtoA_v2); + CUDA_LIBRARY_FIND(cuMemcpy2D_v2); + CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned_v2); + CUDA_LIBRARY_FIND(cuMemcpy3D_v2); + CUDA_LIBRARY_FIND(cuMemcpy3DPeer); + CUDA_LIBRARY_FIND(cuMemcpyAsync); + CUDA_LIBRARY_FIND(cuMemcpyPeerAsync); + CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync_v2); + CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync_v2); + CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync_v2); + CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync_v2); + CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync_v2); + CUDA_LIBRARY_FIND(cuMemcpy2DAsync_v2); + CUDA_LIBRARY_FIND(cuMemcpy3DAsync_v2); + CUDA_LIBRARY_FIND(cuMemcpy3DPeerAsync); + CUDA_LIBRARY_FIND(cuMemsetD8_v2); + CUDA_LIBRARY_FIND(cuMemsetD16_v2); + CUDA_LIBRARY_FIND(cuMemsetD32_v2); + CUDA_LIBRARY_FIND(cuMemsetD2D8_v2); + CUDA_LIBRARY_FIND(cuMemsetD2D16_v2); + CUDA_LIBRARY_FIND(cuMemsetD2D32_v2); + CUDA_LIBRARY_FIND(cuMemsetD8Async); + CUDA_LIBRARY_FIND(cuMemsetD16Async); + CUDA_LIBRARY_FIND(cuMemsetD32Async); + CUDA_LIBRARY_FIND(cuMemsetD2D8Async); + CUDA_LIBRARY_FIND(cuMemsetD2D16Async); + CUDA_LIBRARY_FIND(cuMemsetD2D32Async); + CUDA_LIBRARY_FIND(cuArrayCreate_v2); + CUDA_LIBRARY_FIND(cuArrayGetDescriptor_v2); + CUDA_LIBRARY_FIND(cuArrayDestroy); + CUDA_LIBRARY_FIND(cuArray3DCreate_v2); + CUDA_LIBRARY_FIND(cuArray3DGetDescriptor_v2); + CUDA_LIBRARY_FIND(cuMipmappedArrayCreate); + CUDA_LIBRARY_FIND(cuMipmappedArrayGetLevel); + CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy); + CUDA_LIBRARY_FIND(cuPointerGetAttribute); + CUDA_LIBRARY_FIND(cuPointerSetAttribute); + CUDA_LIBRARY_FIND(cuStreamCreate); + CUDA_LIBRARY_FIND(cuStreamCreateWithPriority); + CUDA_LIBRARY_FIND(cuStreamGetPriority); + CUDA_LIBRARY_FIND(cuStreamGetFlags); + CUDA_LIBRARY_FIND(cuStreamWaitEvent); + CUDA_LIBRARY_FIND(cuStreamAddCallback); + CUDA_LIBRARY_FIND(cuStreamAttachMemAsync); + CUDA_LIBRARY_FIND(cuStreamQuery); + CUDA_LIBRARY_FIND(cuStreamSynchronize); + CUDA_LIBRARY_FIND(cuStreamDestroy_v2); + CUDA_LIBRARY_FIND(cuEventCreate); + CUDA_LIBRARY_FIND(cuEventRecord); + CUDA_LIBRARY_FIND(cuEventQuery); + CUDA_LIBRARY_FIND(cuEventSynchronize); + CUDA_LIBRARY_FIND(cuEventDestroy_v2); + CUDA_LIBRARY_FIND(cuEventElapsedTime); + CUDA_LIBRARY_FIND(cuFuncGetAttribute); + CUDA_LIBRARY_FIND(cuFuncSetCacheConfig); + CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig); + CUDA_LIBRARY_FIND(cuLaunchKernel); + CUDA_LIBRARY_FIND(cuFuncSetBlockShape); + CUDA_LIBRARY_FIND(cuFuncSetSharedSize); + CUDA_LIBRARY_FIND(cuParamSetSize); + CUDA_LIBRARY_FIND(cuParamSeti); + CUDA_LIBRARY_FIND(cuParamSetf); + CUDA_LIBRARY_FIND(cuParamSetv); + CUDA_LIBRARY_FIND(cuLaunch); + CUDA_LIBRARY_FIND(cuLaunchGrid); + CUDA_LIBRARY_FIND(cuLaunchGridAsync); + CUDA_LIBRARY_FIND(cuParamSetTexRef); + CUDA_LIBRARY_FIND(cuTexRefSetArray); + CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray); + CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2); + CUDA_LIBRARY_FIND(cuTexRefSetAddress2D_v3); + CUDA_LIBRARY_FIND(cuTexRefSetFormat); + CUDA_LIBRARY_FIND(cuTexRefSetAddressMode); + CUDA_LIBRARY_FIND(cuTexRefSetFilterMode); + CUDA_LIBRARY_FIND(cuTexRefSetMipmapFilterMode); + CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelBias); + CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelClamp); + CUDA_LIBRARY_FIND(cuTexRefSetMaxAnisotropy); + CUDA_LIBRARY_FIND(cuTexRefSetFlags); + CUDA_LIBRARY_FIND(cuTexRefGetAddress_v2); + CUDA_LIBRARY_FIND(cuTexRefGetArray); + CUDA_LIBRARY_FIND(cuTexRefGetMipmappedArray); + CUDA_LIBRARY_FIND(cuTexRefGetAddressMode); + CUDA_LIBRARY_FIND(cuTexRefGetFilterMode); + CUDA_LIBRARY_FIND(cuTexRefGetFormat); + CUDA_LIBRARY_FIND(cuTexRefGetMipmapFilterMode); + CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelBias); + CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelClamp); + CUDA_LIBRARY_FIND(cuTexRefGetMaxAnisotropy); + CUDA_LIBRARY_FIND(cuTexRefGetFlags); + CUDA_LIBRARY_FIND(cuTexRefCreate); + CUDA_LIBRARY_FIND(cuTexRefDestroy); + CUDA_LIBRARY_FIND(cuSurfRefSetArray); + CUDA_LIBRARY_FIND(cuSurfRefGetArray); + CUDA_LIBRARY_FIND(cuTexObjectCreate); + CUDA_LIBRARY_FIND(cuTexObjectDestroy); + CUDA_LIBRARY_FIND(cuTexObjectGetResourceDesc); + CUDA_LIBRARY_FIND(cuTexObjectGetTextureDesc); + CUDA_LIBRARY_FIND(cuTexObjectGetResourceViewDesc); + CUDA_LIBRARY_FIND(cuSurfObjectCreate); + CUDA_LIBRARY_FIND(cuSurfObjectDestroy); + CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc); + CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer); + CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess); + CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess); + CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource); + CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray); + CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray); + CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2); + CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags); + CUDA_LIBRARY_FIND(cuGraphicsMapResources); + CUDA_LIBRARY_FIND(cuGraphicsUnmapResources); + CUDA_LIBRARY_FIND(cuGetExportTable); + + CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer); + CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage); + CUDA_LIBRARY_FIND(cuGLGetDevices); + CUDA_LIBRARY_FIND(cuGLCtxCreate_v2); + CUDA_LIBRARY_FIND(cuGLInit); + CUDA_LIBRARY_FIND(cuGLRegisterBufferObject); + CUDA_LIBRARY_FIND(cuGLMapBufferObject_v2); + CUDA_LIBRARY_FIND(cuGLUnmapBufferObject); + CUDA_LIBRARY_FIND(cuGLUnregisterBufferObject); + CUDA_LIBRARY_FIND(cuGLSetBufferObjectMapFlags); + CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2); + CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync); + + + result = CUEW_SUCCESS; + return result; +} + +const char *cuewErrorString(CUresult result) { + switch(result) { + case CUDA_SUCCESS: return "No errors"; + case CUDA_ERROR_INVALID_VALUE: return "Invalid value"; + case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory"; + case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized"; + case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized"; + case CUDA_ERROR_PROFILER_DISABLED: return "PROFILER_DISABLED"; + case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "PROFILER_NOT_INITIALIZED"; + case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "PROFILER_ALREADY_STARTED"; + case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "PROFILER_ALREADY_STOPPED"; + case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available"; + case CUDA_ERROR_INVALID_DEVICE: return "Invalid device"; + case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image"; + case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context"; + case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "Context already current"; + case CUDA_ERROR_MAP_FAILED: return "Map failed"; + case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed"; + case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped"; + case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped"; + case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU"; + case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired"; + case CUDA_ERROR_NOT_MAPPED: return "Not mapped"; + case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array"; + case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer"; + case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected"; + case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device"; + case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "CONTEXT_ALREADY_IN_USE"; + case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "PEER_ACCESS_UNSUPPORTED"; + case CUDA_ERROR_INVALID_PTX: return "INVALID_PTX"; + case CUDA_ERROR_INVALID_SOURCE: return "Invalid source"; + case CUDA_ERROR_FILE_NOT_FOUND: return "File not found"; + case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve"; + case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed"; + case CUDA_ERROR_OPERATING_SYSTEM: return "OPERATING_SYSTEM"; + case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle"; + case CUDA_ERROR_NOT_FOUND: return "Not found"; + case CUDA_ERROR_NOT_READY: return "CUDA not ready"; + case CUDA_ERROR_ILLEGAL_ADDRESS: return "ILLEGAL_ADDRESS"; + case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources"; + case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout"; + case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing"; + case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "PEER_ACCESS_ALREADY_ENABLED"; + case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "PEER_ACCESS_NOT_ENABLED"; + case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "PRIMARY_CONTEXT_ACTIVE"; + case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "CONTEXT_IS_DESTROYED"; + case CUDA_ERROR_ASSERT: return "ASSERT"; + case CUDA_ERROR_TOO_MANY_PEERS: return "TOO_MANY_PEERS"; + case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "HOST_MEMORY_ALREADY_REGISTERED"; + case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "HOST_MEMORY_NOT_REGISTERED"; + case CUDA_ERROR_HARDWARE_STACK_ERROR: return "HARDWARE_STACK_ERROR"; + case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "ILLEGAL_INSTRUCTION"; + case CUDA_ERROR_MISALIGNED_ADDRESS: return "MISALIGNED_ADDRESS"; + case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "INVALID_ADDRESS_SPACE"; + case CUDA_ERROR_INVALID_PC: return "INVALID_PC"; + case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed"; + case CUDA_ERROR_NOT_PERMITTED: return "NOT_PERMITTED"; + case CUDA_ERROR_NOT_SUPPORTED: return "NOT_SUPPORTED"; + case CUDA_ERROR_UNKNOWN: return "Unknown error"; + default: return "Unknown CUDA error value"; + } +} + +static void path_join(const char *path1, + const char *path2, + int maxlen, + char *result) { +#if defined(WIN32) || defined(_WIN32) + const char separator = '\\'; +#else + const char separator = '/'; +#endif + int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2); + if (n != -1 && n < maxlen) { + result[n] = '\0'; + } + else { + result[maxlen - 1] = '\0'; + } +} + +static int path_exists(const char *path) { + struct stat st; + if (stat(path, &st)) { + return 0; + } + return 1; +} + +const char *cuewCompilerPath(void) { +#ifdef _WIN32 + const char *defaultpaths[] = {"C:/CUDA/bin", NULL}; + const char *executable = "nvcc.exe"; +#else + const char *defaultpaths[] = { + "/Developer/NVIDIA/CUDA-5.0/bin", + "/usr/local/cuda-5.0/bin", + "/usr/local/cuda/bin", + "/Developer/NVIDIA/CUDA-6.0/bin", + "/usr/local/cuda-6.0/bin", + "/Developer/NVIDIA/CUDA-5.5/bin", + "/usr/local/cuda-5.5/bin", + NULL}; + const char *executable = "nvcc"; +#endif + int i; + + const char *binpath = getenv("CUDA_BIN_PATH"); + + static char nvcc[65536]; + + if (binpath) { + path_join(binpath, executable, sizeof(nvcc), nvcc); + if (path_exists(nvcc)) + return nvcc; + } + + for (i = 0; defaultpaths[i]; ++i) { + path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc); + if (path_exists(nvcc)) + return nvcc; + } + +#ifndef _WIN32 + { + FILE *handle = popen("which nvcc", "r"); + if (handle) { + char buffer[4096] = {0}; + int len = fread(buffer, 1, sizeof(buffer) - 1, handle); + buffer[len] = '\0'; + pclose(handle); + + if (buffer[0]) + return "nvcc"; + } + } +#endif + + return NULL; +} + +int cuewCompilerVersion(void) { + const char *path = cuewCompilerPath(); + const char *marker = "Cuda compilation tools, release "; + FILE *pipe; + int major, minor; + char *versionstr; + char buf[128]; + char output[65536] = "\0"; + char command[65536] = "\0"; + + if (path == NULL) + return 0; + + /* get --version output */ + strncpy(command, path, sizeof(command)); + strncat(command, " --version", sizeof(command) - strlen(path)); + pipe = popen(command, "r"); + if (!pipe) { + fprintf(stderr, "CUDA: failed to run compiler to retrieve version"); + return 0; + } + + while (!feof(pipe)) { + if (fgets(buf, sizeof(buf), pipe) != NULL) { + strncat(output, buf, sizeof(output) - strlen(output)); + } + } + + pclose(pipe); + + /* parse version number */ + versionstr = strstr(output, marker); + if (versionstr == NULL) { + fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output); + return 0; + } + versionstr += strlen(marker); + + if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) { + fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output); + return 0; + } + + return 10 * major + minor; +} + |