Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'extern/cuew')
-rw-r--r--extern/cuew/CMakeLists.txt40
-rw-r--r--extern/cuew/LICENSE174
-rw-r--r--extern/cuew/README12
-rw-r--r--extern/cuew/SConscript35
-rw-r--r--extern/cuew/auto/cuda_errors.py35
-rw-r--r--extern/cuew/auto/cuda_extra.py125
-rw-r--r--extern/cuew/auto/cuew_gen.py591
-rwxr-xr-xextern/cuew/auto/cuew_gen.sh10
-rw-r--r--extern/cuew/auto/stdlib.h3
-rw-r--r--extern/cuew/include/cuew.h1138
-rw-r--r--extern/cuew/src/cuew.c710
11 files changed, 2873 insertions, 0 deletions
diff --git a/extern/cuew/CMakeLists.txt b/extern/cuew/CMakeLists.txt
new file mode 100644
index 00000000000..284fbbc6aca
--- /dev/null
+++ b/extern/cuew/CMakeLists.txt
@@ -0,0 +1,40 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The Original Code is Copyright (C) 2006, Blender Foundation
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): Jacques Beaurain.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+set(INC
+ .
+ include
+)
+
+set(INC_SYS
+
+)
+
+set(SRC
+ include/cuew.h
+ src/cuew.c
+)
+
+blender_add_lib(extern_cuew "${SRC}" "${INC}" "${INC_SYS}")
diff --git a/extern/cuew/LICENSE b/extern/cuew/LICENSE
new file mode 100644
index 00000000000..c7533090bbe
--- /dev/null
+++ b/extern/cuew/LICENSE
@@ -0,0 +1,174 @@
+
+ Modified Apache 2.0 License
+
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor
+ and its affiliates, except as required to comply with Section 4(c) of
+ the License and to reproduce the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
diff --git a/extern/cuew/README b/extern/cuew/README
new file mode 100644
index 00000000000..3c43b7278d9
--- /dev/null
+++ b/extern/cuew/README
@@ -0,0 +1,12 @@
+The CUDA Extension Wrangler Library (CUEW) is a cross-platform open-source
+C/C++ extension loading library. CUEW provides efficient run-time mechanisms
+for determining which CUDA functions and extensions extensions are supported
+on the target platform.
+
+CUDA core and extension functionality is exposed in a single header file.
+GUEW has been tested on a variety of operating systems, including Windows,
+Linux, Mac OS X.
+
+LICENSE
+
+CUEW library is released under the Apache 2.0 license.
diff --git a/extern/cuew/SConscript b/extern/cuew/SConscript
new file mode 100644
index 00000000000..9c12c71133c
--- /dev/null
+++ b/extern/cuew/SConscript
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+#
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The Original Code is Copyright (C) 2006, Blender Foundation
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): Nathan Letwory.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+Import ('env')
+
+sources = env.Glob('src/cuew.c')
+
+incs = 'include'
+defs = []
+
+env.BlenderLib ('extern_cuew', sources, Split(incs), defines=defs, libtype=['system'], priority = [0])
diff --git a/extern/cuew/auto/cuda_errors.py b/extern/cuew/auto/cuda_errors.py
new file mode 100644
index 00000000000..464b7765234
--- /dev/null
+++ b/extern/cuew/auto/cuda_errors.py
@@ -0,0 +1,35 @@
+CUDA_ERRORS={
+'CUDA_SUCCESS': "No errors",
+'CUDA_ERROR_INVALID_VALUE': "Invalid value",
+'CUDA_ERROR_OUT_OF_MEMORY': "Out of memory",
+'CUDA_ERROR_NOT_INITIALIZED': "Driver not initialized",
+'CUDA_ERROR_DEINITIALIZED': "Driver deinitialized",
+'CUDA_ERROR_NO_DEVICE': "No CUDA-capable device available",
+'CUDA_ERROR_INVALID_DEVICE': "Invalid device",
+'CUDA_ERROR_INVALID_IMAGE': "Invalid kernel image",
+'CUDA_ERROR_INVALID_CONTEXT': "Invalid context",
+'CUDA_ERROR_CONTEXT_ALREADY_CURRENT': "Context already current",
+'CUDA_ERROR_MAP_FAILED': "Map failed",
+'CUDA_ERROR_UNMAP_FAILED': "Unmap failed",
+'CUDA_ERROR_ARRAY_IS_MAPPED': "Array is mapped",
+'CUDA_ERROR_ALREADY_MAPPED': "Already mapped",
+'CUDA_ERROR_NO_BINARY_FOR_GPU': "No binary for GPU",
+'CUDA_ERROR_ALREADY_ACQUIRED': "Already acquired",
+'CUDA_ERROR_NOT_MAPPED': "Not mapped",
+'CUDA_ERROR_NOT_MAPPED_AS_ARRAY': "Mapped resource not available for access as an array",
+'CUDA_ERROR_NOT_MAPPED_AS_POINTER': "Mapped resource not available for access as a pointer",
+'CUDA_ERROR_ECC_UNCORRECTABLE': "Uncorrectable ECC error detected",
+'CUDA_ERROR_UNSUPPORTED_LIMIT': "CUlimit not supported by device",
+'CUDA_ERROR_INVALID_SOURCE': "Invalid source",
+'CUDA_ERROR_FILE_NOT_FOUND': "File not found",
+'CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND': "Link to a shared object failed to resolve",
+'CUDA_ERROR_SHARED_OBJECT_INIT_FAILED': "Shared object initialization failed",
+'CUDA_ERROR_INVALID_HANDLE': "Invalid handle",
+'CUDA_ERROR_NOT_FOUND': "Not found",
+'CUDA_ERROR_NOT_READY': "CUDA not ready",
+'CUDA_ERROR_LAUNCH_FAILED': "Launch failed",
+'CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES': "Launch exceeded resources",
+'CUDA_ERROR_LAUNCH_TIMEOUT': "Launch exceeded timeout",
+'CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING': "Launch with incompatible texturing",
+'CUDA_ERROR_UNKNOWN': "Unknown error",
+}
diff --git a/extern/cuew/auto/cuda_extra.py b/extern/cuew/auto/cuda_extra.py
new file mode 100644
index 00000000000..fd4f466df83
--- /dev/null
+++ b/extern/cuew/auto/cuda_extra.py
@@ -0,0 +1,125 @@
+extra_code = """
+static void path_join(const char *path1,
+ const char *path2,
+ int maxlen,
+ char *result) {
+#if defined(WIN32) || defined(_WIN32)
+ const char separator = '\\\\';
+#else
+ const char separator = '/';
+#endif
+ int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
+ if (n != -1 && n < maxlen) {
+ result[n] = '\\0';
+ }
+ else {
+ result[maxlen - 1] = '\\0';
+ }
+}
+
+static int path_exists(const char *path) {
+ struct stat st;
+ if (stat(path, &st)) {
+ return 0;
+ }
+ return 1;
+}
+
+const char *cuewCompilerPath(void) {
+#ifdef _WIN32
+ const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
+ const char *executable = "nvcc.exe";
+#else
+ const char *defaultpaths[] = {
+ "/Developer/NVIDIA/CUDA-5.0/bin",
+ "/usr/local/cuda-5.0/bin",
+ "/usr/local/cuda/bin",
+ "/Developer/NVIDIA/CUDA-6.0/bin",
+ "/usr/local/cuda-6.0/bin",
+ "/Developer/NVIDIA/CUDA-5.5/bin",
+ "/usr/local/cuda-5.5/bin",
+ NULL};
+ const char *executable = "nvcc";
+#endif
+ int i;
+
+ const char *binpath = getenv("CUDA_BIN_PATH");
+
+ static char nvcc[65536];
+
+ if (binpath) {
+ path_join(binpath, executable, sizeof(nvcc), nvcc);
+ if (path_exists(nvcc))
+ return nvcc;
+ }
+
+ for (i = 0; defaultpaths[i]; ++i) {
+ path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
+ if (path_exists(nvcc))
+ return nvcc;
+ }
+
+#ifndef _WIN32
+ {
+ FILE *handle = popen("which nvcc", "r");
+ if (handle) {
+ char buffer[4096] = {0};
+ int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
+ buffer[len] = '\\0';
+ pclose(handle);
+
+ if (buffer[0])
+ return "nvcc";
+ }
+ }
+#endif
+
+ return NULL;
+}
+
+int cuewCompilerVersion(void) {
+ const char *path = cuewCompilerPath();
+ const char *marker = "Cuda compilation tools, release ";
+ FILE *pipe;
+ int major, minor;
+ char *versionstr;
+ char buf[128];
+ char output[65536] = "\\0";
+ char command[65536] = "\\0";
+
+ if (path == NULL)
+ return 0;
+
+ /* get --version output */
+ strncpy(command, path, sizeof(command));
+ strncat(command, " --version", sizeof(command) - strlen(path));
+ pipe = popen(command, "r");
+ if (!pipe) {
+ fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
+ return 0;
+ }
+
+ while (!feof(pipe)) {
+ if (fgets(buf, sizeof(buf), pipe) != NULL) {
+ strncat(output, buf, sizeof(output) - strlen(output));
+ }
+ }
+
+ pclose(pipe);
+
+ /* parse version number */
+ versionstr = strstr(output, marker);
+ if (versionstr == NULL) {
+ fprintf(stderr, "CUDA: failed to find version number in:\\n\\n%s\\n", output);
+ return 0;
+ }
+ versionstr += strlen(marker);
+
+ if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
+ fprintf(stderr, "CUDA: failed to parse version number from:\\n\\n%s\\n", output);
+ return 0;
+ }
+
+ return 10 * major + minor;
+}
+"""
diff --git a/extern/cuew/auto/cuew_gen.py b/extern/cuew/auto/cuew_gen.py
new file mode 100644
index 00000000000..a94525c52b1
--- /dev/null
+++ b/extern/cuew/auto/cuew_gen.py
@@ -0,0 +1,591 @@
+#!/usr/bin/env python3
+#
+# Copyright 2014 Blender Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+
+# This script generates either header or implementation file from
+# a CUDA header files.
+#
+# Usage: cuew hdr|impl [/path/to/cuda/includes]
+# - hdr means header file will be generated and printed to stdout.
+# - impl means implementation file will be generated and printed to stdout.
+# - /path/to/cuda/includes is a path to a folder with cuda.h and cudaGL.h
+# for which wrangler will be generated.
+
+import os
+import sys
+from cuda_errors import CUDA_ERRORS
+from pycparser import c_parser, c_ast, parse_file
+from subprocess import Popen, PIPE
+
+INCLUDE_DIR = "/usr/include"
+LIB = "CUEW"
+REAL_LIB = "CUDA"
+VERSION_MAJOR = "1"
+VERSION_MINOR = "2"
+COPYRIGHT = """/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */"""
+FILES = ["cuda.h", "cudaGL.h"]
+
+TYPEDEFS = []
+FUNC_TYPEDEFS = []
+SYMBOLS = []
+DEFINES = []
+DEFINES_V2 = []
+ERRORS = []
+
+
+class FuncDefVisitor(c_ast.NodeVisitor):
+ indent = 0
+ prev_complex = False
+ dummy_typedefs = ['size_t', 'CUdeviceptr']
+
+ def _get_quals_string(self, node):
+ if node.quals:
+ return ' '.join(node.quals) + ' '
+ return ''
+
+ def _get_ident_type(self, node):
+ if isinstance(node, c_ast.PtrDecl):
+ return self._get_ident_type(node.type.type) + '*'
+ if isinstance(node, c_ast.ArrayDecl):
+ return self._get_ident_type(node.type)
+ elif isinstance(node, c_ast.Struct):
+ if node.name:
+ return 'struct ' + node.name
+ else:
+ self.indent += 1
+ struct = self._stringify_struct(node)
+ self.indent -= 1
+ return "struct {\n" + \
+ struct + (" " * self.indent) + "}"
+ elif isinstance(node, c_ast.Union):
+ self.indent += 1
+ union = self._stringify_struct(node)
+ self.indent -= 1
+ return "union {\n" + union + (" " * self.indent) + "}"
+ elif isinstance(node, c_ast.Enum):
+ return 'enum ' + node.name
+ elif isinstance(node, c_ast.TypeDecl):
+ return self._get_ident_type(node.type)
+ else:
+ return node.names[0]
+
+ def _stringify_param(self, param):
+ param_type = param.type
+ result = self._get_quals_string(param)
+ result += self._get_ident_type(param_type)
+ if param.name:
+ result += ' ' + param.name
+ if isinstance(param_type, c_ast.ArrayDecl):
+ # TODO(sergey): Workaround to deal with the
+ # preprocessed file where array size got
+ # substituded.
+ dim = param_type.dim.value
+ if param.name == "reserved" and dim == "64":
+ dim = "CU_IPC_HANDLE_SIZE"
+ result += '[' + dim + ']'
+ return result
+
+ def _stringify_params(self, params):
+ result = []
+ for param in params:
+ result.append(self._stringify_param(param))
+ return ', '.join(result)
+
+ def _stringify_struct(self, node):
+ result = ""
+ children = node.children()
+ for child in children:
+ member = self._stringify_param(child[1])
+ result += (" " * self.indent) + member + ";\n"
+ return result
+
+ def _stringify_enum(self, node):
+ result = ""
+ children = node.children()
+ for child in children:
+ if isinstance(child[1], c_ast.EnumeratorList):
+ enumerators = child[1].enumerators
+ for enumerator in enumerators:
+ result += (" " * self.indent) + enumerator.name
+ if enumerator.value:
+ result += " = " + enumerator.value.value
+ result += ",\n"
+ if enumerator.name.startswith("CUDA_ERROR_"):
+ ERRORS.append(enumerator.name)
+ return result
+
+ def visit_Decl(self, node):
+ if node.type.__class__.__name__ == 'FuncDecl':
+ if isinstance(node.type, c_ast.FuncDecl):
+ func_decl = node.type
+ func_decl_type = func_decl.type
+
+ typedef = 'typedef '
+ symbol_name = None
+
+ if isinstance(func_decl_type, c_ast.TypeDecl):
+ symbol_name = func_decl_type.declname
+ typedef += self._get_quals_string(func_decl_type)
+ typedef += self._get_ident_type(func_decl_type.type)
+ typedef += ' CUDAAPI'
+ typedef += ' t' + symbol_name
+ elif isinstance(func_decl_type, c_ast.PtrDecl):
+ ptr_type = func_decl_type.type
+ symbol_name = ptr_type.declname
+ typedef += self._get_quals_string(ptr_type)
+ typedef += self._get_ident_type(func_decl_type)
+ typedef += ' CUDAAPI'
+ typedef += ' t' + symbol_name
+
+ typedef += '(' + \
+ self._stringify_params(func_decl.args.params) + \
+ ');'
+
+ SYMBOLS.append(symbol_name)
+ FUNC_TYPEDEFS.append(typedef)
+
+ def visit_Typedef(self, node):
+ if node.name in self.dummy_typedefs:
+ return
+
+ complex = False
+ type = self._get_ident_type(node.type)
+ quals = self._get_quals_string(node)
+
+ if isinstance(node.type.type, c_ast.Struct):
+ self.indent += 1
+ struct = self._stringify_struct(node.type.type)
+ self.indent -= 1
+ typedef = quals + type + " {\n" + struct + "} " + node.name
+ complex = True
+ elif isinstance(node.type.type, c_ast.Enum):
+ self.indent += 1
+ enum = self._stringify_enum(node.type.type)
+ self.indent -= 1
+ typedef = quals + type + " {\n" + enum + "} " + node.name
+ complex = True
+ else:
+ typedef = quals + type + " " + node.name
+ if complex or self.prev_complex:
+ typedef = "\ntypedef " + typedef + ";"
+ else:
+ typedef = "typedef " + typedef + ";"
+
+ TYPEDEFS.append(typedef)
+
+ self.prev_complex = complex
+
+
+def get_latest_cpp():
+ path_prefix = "/usr/bin"
+ for cpp_version in ["9", "8", "7", "6", "5", "4"]:
+ test_cpp = os.path.join(path_prefix, "cpp-4." + cpp_version)
+ if os.path.exists(test_cpp):
+ return test_cpp
+ return None
+
+
+def preprocess_file(filename, cpp_path):
+ args = [cpp_path, "-I./"]
+ if filename.endswith("GL.h"):
+ args.append("-DCUDAAPI= ")
+ args.append(filename)
+
+ try:
+ pipe = Popen(args,
+ stdout=PIPE,
+ universal_newlines=True)
+ text = pipe.communicate()[0]
+ except OSError as e:
+ raise RuntimeError("Unable to invoke 'cpp'. " +
+ 'Make sure its path was passed correctly\n' +
+ ('Original error: %s' % e))
+
+ return text
+
+
+def parse_files():
+ parser = c_parser.CParser()
+ cpp_path = get_latest_cpp()
+
+ for filename in FILES:
+ filepath = os.path.join(INCLUDE_DIR, filename)
+ dummy_typedefs = {}
+ text = preprocess_file(filepath, cpp_path)
+
+ if filepath.endswith("GL.h"):
+ dummy_typedefs = {
+ "CUresult": "int",
+ "CUgraphicsResource": "void *",
+ "CUdevice": "void *",
+ "CUcontext": "void *",
+ "CUdeviceptr": "void *",
+ "CUstream": "void *"
+ }
+
+ text = "typedef int GLint;\n" + text
+ text = "typedef unsigned int GLuint;\n" + text
+ text = "typedef unsigned int GLenum;\n" + text
+ text = "typedef long size_t;\n" + text
+
+ for typedef in sorted(dummy_typedefs):
+ text = "typedef " + dummy_typedefs[typedef] + " " + \
+ typedef + ";\n" + text
+
+ ast = parser.parse(text, filepath)
+
+ with open(filepath) as f:
+ lines = f.readlines()
+ for line in lines:
+ if line.startswith("#define"):
+ line = line[8:-1]
+ token = line.split()
+ if token[0] not in ("__cuda_cuda_h__",
+ "CUDA_CB",
+ "CUDAAPI"):
+ DEFINES.append(token)
+
+ for line in lines:
+ # TODO(sergey): Use better matching rule for _v2 symbols.
+ if line[0].isspace() and line.lstrip().startswith("#define"):
+ line = line[12:-1]
+ token = line.split()
+ if len(token) == 2 and token[1].endswith("_v2"):
+ DEFINES_V2.append(token)
+
+ v = FuncDefVisitor()
+ for typedef in dummy_typedefs:
+ v.dummy_typedefs.append(typedef)
+ v.visit(ast)
+
+ FUNC_TYPEDEFS.append('')
+ SYMBOLS.append('')
+
+
+def print_copyright():
+ print(COPYRIGHT)
+ print("")
+
+
+def open_header_guard():
+ print("#ifndef __%s_H__" % (LIB))
+ print("#define __%s_H__" % (LIB))
+ print("")
+ print("#ifdef __cplusplus")
+ print("extern \"C\" {")
+ print("#endif")
+ print("")
+
+
+def close_header_guard():
+ print("")
+ print("#ifdef __cplusplus")
+ print("}")
+ print("#endif")
+ print("")
+ print("#endif /* __%s_H__ */" % (LIB))
+
+
+def print_header():
+ print_copyright()
+ open_header_guard()
+
+ # Fot size_t.
+ print("#include <stdlib.h>")
+ print("")
+
+ print("/* Defines. */")
+ print("#define %s_VERSION_MAJOR %s" % (LIB, VERSION_MAJOR))
+ print("#define %s_VERSION_MINOR %s" % (LIB, VERSION_MINOR))
+ print("")
+ for define in DEFINES:
+ print('#define %s' % (' '.join(define)))
+ print("")
+
+ print("""/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
+ * the cuda library has both the old ones for compatibility and new
+ * ones with _v2 postfix,
+ */""")
+ for define in DEFINES_V2:
+ print('#define %s' % (' '.join(define)))
+ print("")
+
+ print("/* Types. */")
+
+ # We handle this specially because of the file is
+ # getting preprocessed.
+ print("""#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+typedef unsigned long long CUdeviceptr;
+#else
+typedef unsigned int CUdeviceptr;
+#endif
+""")
+
+ for typedef in TYPEDEFS:
+ print('%s' % (typedef))
+
+ # TDO(sergey): This is only specific to CUDA wrapper.
+ print("""
+#ifdef _WIN32
+# define CUDAAPI __stdcall
+# define CUDA_CB __stdcall
+#else
+# define CUDAAPI
+# define CUDA_CB
+#endif
+""")
+
+ print("/* Function types. */")
+ for func_typedef in FUNC_TYPEDEFS:
+ print('%s' % (func_typedef))
+ print("")
+
+ print("/* Function declarations. */")
+ for symbol in SYMBOLS:
+ if symbol:
+ print('extern t%s *%s;' % (symbol, symbol))
+ else:
+ print("")
+
+ print("")
+ print("enum {")
+ print(" CUEW_SUCCESS = 0,")
+ print(" CUEW_ERROR_OPEN_FAILED = -1,")
+ print(" CUEW_ERROR_ATEXIT_FAILED = -2,")
+ print("};")
+ print("")
+ print("int %sInit(void);" % (LIB.lower()))
+ # TODO(sergey): Get rid of hardcoded CUresult.
+ print("const char *%sErrorString(CUresult result);" % (LIB.lower()))
+ print("const char *cuewCompilerPath(void);")
+ print("int cuewCompilerVersion(void);")
+
+ close_header_guard()
+
+
+def print_dl_wrapper():
+ print("""#ifdef _WIN32
+# define WIN32_LEAN_AND_MEAN
+# define VC_EXTRALEAN
+# include <windows.h>
+
+/* Utility macros. */
+
+typedef HMODULE DynamicLibrary;
+
+# define dynamic_library_open(path) LoadLibrary(path)
+# define dynamic_library_close(lib) FreeLibrary(lib)
+# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
+#else
+# include <dlfcn.h>
+
+typedef void* DynamicLibrary;
+
+# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
+# define dynamic_library_close(lib) dlclose(lib)
+# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
+#endif
+""")
+
+
+def print_dl_helper_macro():
+ print("""#define %s_LIBRARY_FIND_CHECKED(name) \\
+ name = (t##name *)dynamic_library_find(lib, #name); \\
+ assert(name);
+
+#define %s_LIBRARY_FIND(name) \\
+ name = (t##name *)dynamic_library_find(lib, #name);
+
+static DynamicLibrary lib;""" % (REAL_LIB, REAL_LIB))
+ print("")
+
+
+def print_dl_close():
+ print("""static void %sExit(void) {
+ if(lib != NULL) {
+ /* Ignore errors. */
+ dynamic_library_close(lib);
+ lib = NULL;
+ }
+}""" % (LIB.lower()))
+ print("")
+
+
+def print_lib_path():
+ # TODO(sergey): get rid of hardcoded libraries.
+ print("""#ifdef _WIN32
+ /* Expected in c:/windows/system or similar, no path needed. */
+ const char *path = "nvcuda.dll";
+#elif defined(__APPLE__)
+ /* Default installation path. */
+ const char *path = "/usr/local/cuda/lib/libcuda.dylib";
+#else
+ const char *path = "libcuda.so";
+#endif""")
+
+
+def print_init_guard():
+ print(""" static int initialized = 0;
+ static int result = 0;
+ int error, driver_version;
+
+ if (initialized) {
+ return result;
+ }
+
+ initialized = 1;
+
+ error = atexit(cuewExit);
+ if (error) {
+ result = CUEW_ERROR_ATEXIT_FAILED;
+ return result;
+ }
+
+ /* Load library. */
+ lib = dynamic_library_open(path);
+
+ if (lib == NULL) {
+ result = CUEW_ERROR_OPEN_FAILED;
+ return result;
+ }""")
+ print("")
+
+
+def print_driver_version_guard():
+ # TODO(sergey): Currently it's hardcoded for CUDA only.
+ print(""" /* Detect driver version. */
+ driver_version = 1000;
+
+ %s_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
+ if (cuDriverGetVersion) {
+ cuDriverGetVersion(&driver_version);
+ }
+
+ /* We require version 4.0. */
+ if (driver_version < 4000) {
+ result = CUEW_ERROR_OPEN_FAILED;
+ return result;
+ }""" % (REAL_LIB))
+
+
+def print_dl_init():
+ print("int %sInit(void) {" % (LIB.lower()))
+
+ print(" /* Library paths. */")
+ print_lib_path()
+ print_init_guard()
+ print_driver_version_guard()
+
+ print(" /* Fetch all function pointers. */")
+ for symbol in SYMBOLS:
+ if symbol:
+ print(" %s_LIBRARY_FIND(%s);" % (REAL_LIB, symbol))
+ else:
+ print("")
+
+ print("")
+ print(" result = CUEW_SUCCESS;")
+ print(" return result;")
+
+ print("}")
+
+
+def print_implementation():
+ print_copyright()
+
+ # TODO(sergey): Get rid of hardcoded header.
+ print("""#ifdef _MSC_VER
+# define snprintf _snprintf
+# define popen _popen
+# define pclose _pclose
+# define _CRT_SECURE_NO_WARNINGS
+#endif
+""")
+ print("#include <cuew.h>")
+ print("#include <assert.h>")
+ print("#include <stdio.h>")
+ print("#include <string.h>")
+ print("#include <sys/stat.h>")
+ print("")
+
+ print_dl_wrapper()
+ print_dl_helper_macro()
+
+ print("/* Function definitions. */")
+ for symbol in SYMBOLS:
+ if symbol:
+ print('t%s *%s;' % (symbol, symbol))
+ else:
+ print("")
+ print("")
+
+ print_dl_close()
+
+ print("/* Implementation function. */")
+ print_dl_init()
+
+ print("")
+ # TODO(sergey): Get rid of hardcoded CUresult.
+ print("const char *%sErrorString(CUresult result) {" % (LIB.lower()))
+ print(" switch(result) {")
+ print(" case CUDA_SUCCESS: return \"No errors\";")
+
+ for error in ERRORS:
+ if error in CUDA_ERRORS:
+ str = CUDA_ERRORS[error]
+ else:
+ str = error[11:]
+ print(" case %s: return \"%s\";" % (error, str))
+
+ print(" default: return \"Unknown CUDA error value\";")
+ print(" }")
+ print("}")
+
+ from cuda_extra import extra_code
+ print(extra_code)
+
+if __name__ == "__main__":
+
+ if len(sys.argv) != 2 and len(sys.argv) != 3:
+ print("Usage: %s hdr|impl [/path/to/cuda/toolkit/include]" %
+ (sys.argv[0]))
+ exit(1)
+
+ if len(sys.argv) == 3:
+ INCLUDE_DIR = sys.argv[2]
+
+ parse_files()
+
+ if sys.argv[1] == "hdr":
+ print_header()
+ elif sys.argv[1] == "impl":
+ print_implementation()
+ else:
+ print("Unknown command %s" % (sys.argv[1]))
+ exit(1)
diff --git a/extern/cuew/auto/cuew_gen.sh b/extern/cuew/auto/cuew_gen.sh
new file mode 100755
index 00000000000..b44987b801d
--- /dev/null
+++ b/extern/cuew/auto/cuew_gen.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+# This script invokes cuew_gen.py and updates the
+# header and source files in the repository.
+
+SCRIPT=`realpath -s $0`
+DIR=`dirname $SCRIPT`
+
+python ${DIR}/cuew_gen.py hdr $@ > $DIR/../include/cuew.h
+python ${DIR}/cuew_gen.py impl $@ > $DIR/../src/cuew.c
diff --git a/extern/cuew/auto/stdlib.h b/extern/cuew/auto/stdlib.h
new file mode 100644
index 00000000000..75976c8574f
--- /dev/null
+++ b/extern/cuew/auto/stdlib.h
@@ -0,0 +1,3 @@
+/* This file is needed to workaround issue with parsing system headers. */
+
+typedef long size_t;
diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h
new file mode 100644
index 00000000000..fd03311ad41
--- /dev/null
+++ b/extern/cuew/include/cuew.h
@@ -0,0 +1,1138 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#ifndef __CUEW_H__
+#define __CUEW_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+
+/* Defines. */
+#define CUEW_VERSION_MAJOR 1
+#define CUEW_VERSION_MINOR 2
+
+#define CUDA_VERSION 6000
+#define CU_IPC_HANDLE_SIZE 64
+#define CU_MEMHOSTALLOC_PORTABLE 0x01
+#define CU_MEMHOSTALLOC_DEVICEMAP 0x02
+#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
+#define CU_MEMHOSTREGISTER_PORTABLE 0x01
+#define CU_MEMHOSTREGISTER_DEVICEMAP 0x02
+#define CUDA_ARRAY3D_LAYERED 0x01
+#define CUDA_ARRAY3D_2DARRAY 0x01
+#define CUDA_ARRAY3D_SURFACE_LDST 0x02
+#define CUDA_ARRAY3D_CUBEMAP 0x04
+#define CUDA_ARRAY3D_TEXTURE_GATHER 0x08
+#define CUDA_ARRAY3D_DEPTH_TEXTURE 0x10
+#define CU_TRSA_OVERRIDE_FORMAT 0x01
+#define CU_TRSF_READ_AS_INTEGER 0x01
+#define CU_TRSF_NORMALIZED_COORDINATES 0x02
+#define CU_TRSF_SRGB 0x10
+#define CU_LAUNCH_PARAM_END ((void*)0x00)
+#define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
+#define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
+#define CU_PARAM_TR_DEFAULT -1
+#define CUDAGL_H
+
+/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
+ * the cuda library has both the old ones for compatibility and new
+ * ones with _v2 postfix,
+ */
+#define cuDeviceTotalMem cuDeviceTotalMem_v2
+#define cuCtxCreate cuCtxCreate_v2
+#define cuModuleGetGlobal cuModuleGetGlobal_v2
+#define cuMemGetInfo cuMemGetInfo_v2
+#define cuMemAlloc cuMemAlloc_v2
+#define cuMemAllocPitch cuMemAllocPitch_v2
+#define cuMemFree cuMemFree_v2
+#define cuMemGetAddressRange cuMemGetAddressRange_v2
+#define cuMemAllocHost cuMemAllocHost_v2
+#define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v2
+#define cuMemcpyHtoD cuMemcpyHtoD_v2
+#define cuMemcpyDtoH cuMemcpyDtoH_v2
+#define cuMemcpyDtoD cuMemcpyDtoD_v2
+#define cuMemcpyDtoA cuMemcpyDtoA_v2
+#define cuMemcpyAtoD cuMemcpyAtoD_v2
+#define cuMemcpyHtoA cuMemcpyHtoA_v2
+#define cuMemcpyAtoH cuMemcpyAtoH_v2
+#define cuMemcpyAtoA cuMemcpyAtoA_v2
+#define cuMemcpyHtoAAsync cuMemcpyHtoAAsync_v2
+#define cuMemcpyAtoHAsync cuMemcpyAtoHAsync_v2
+#define cuMemcpy2D cuMemcpy2D_v2
+#define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2
+#define cuMemcpy3D cuMemcpy3D_v2
+#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
+#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
+#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
+#define cuMemcpy2DAsync cuMemcpy2DAsync_v2
+#define cuMemcpy3DAsync cuMemcpy3DAsync_v2
+#define cuMemsetD8 cuMemsetD8_v2
+#define cuMemsetD16 cuMemsetD16_v2
+#define cuMemsetD32 cuMemsetD32_v2
+#define cuMemsetD2D8 cuMemsetD2D8_v2
+#define cuMemsetD2D16 cuMemsetD2D16_v2
+#define cuMemsetD2D32 cuMemsetD2D32_v2
+#define cuArrayCreate cuArrayCreate_v2
+#define cuArrayGetDescriptor cuArrayGetDescriptor_v2
+#define cuArray3DCreate cuArray3DCreate_v2
+#define cuArray3DGetDescriptor cuArray3DGetDescriptor_v2
+#define cuTexRefSetAddress cuTexRefSetAddress_v2
+#define cuTexRefGetAddress cuTexRefGetAddress_v2
+#define cuGraphicsResourceGetMappedPointer cuGraphicsResourceGetMappedPointer_v2
+#define cuCtxDestroy cuCtxDestroy_v2
+#define cuCtxPopCurrent cuCtxPopCurrent_v2
+#define cuCtxPushCurrent cuCtxPushCurrent_v2
+#define cuStreamDestroy cuStreamDestroy_v2
+#define cuEventDestroy cuEventDestroy_v2
+#define cuTexRefSetAddress2D cuTexRefSetAddress2D_v2
+#define cuGLCtxCreate cuGLCtxCreate_v2
+#define cuGLMapBufferObject cuGLMapBufferObject_v2
+#define cuGLMapBufferObjectAsync cuGLMapBufferObjectAsync_v2
+
+/* Types. */
+#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+typedef unsigned long long CUdeviceptr;
+#else
+typedef unsigned int CUdeviceptr;
+#endif
+
+typedef int CUdevice;
+typedef struct CUctx_st* CUcontext;
+typedef struct CUmod_st* CUmodule;
+typedef struct CUfunc_st* CUfunction;
+typedef struct CUarray_st* CUarray;
+typedef struct CUmipmappedArray_st* CUmipmappedArray;
+typedef struct CUtexref_st* CUtexref;
+typedef struct CUsurfref_st* CUsurfref;
+typedef struct CUevent_st* CUevent;
+typedef struct CUstream_st* CUstream;
+typedef struct CUgraphicsResource_st* CUgraphicsResource;
+typedef unsigned CUtexObject;
+typedef unsigned CUsurfObject;
+
+typedef struct CUuuid_st {
+ char bytes[16];
+} CUuuid;
+
+typedef struct CUipcEventHandle_st {
+ char reserved[CU_IPC_HANDLE_SIZE];
+} CUipcEventHandle;
+
+typedef struct CUipcMemHandle_st {
+ char reserved[CU_IPC_HANDLE_SIZE];
+} CUipcMemHandle;
+
+typedef enum CUipcMem_flags_enum {
+ CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1,
+} CUipcMem_flags;
+
+typedef enum CUmemAttach_flags_enum {
+ CU_MEM_ATTACH_GLOBAL = 0x1,
+ CU_MEM_ATTACH_HOST = 0x2,
+ CU_MEM_ATTACH_SINGLE = 0x4,
+} CUmemAttach_flags;
+
+typedef enum CUctx_flags_enum {
+ CU_CTX_SCHED_AUTO = 0x00,
+ CU_CTX_SCHED_SPIN = 0x01,
+ CU_CTX_SCHED_YIELD = 0x02,
+ CU_CTX_SCHED_BLOCKING_SYNC = 0x04,
+ CU_CTX_BLOCKING_SYNC = 0x04,
+ CU_CTX_SCHED_MASK = 0x07,
+ CU_CTX_MAP_HOST = 0x08,
+ CU_CTX_LMEM_RESIZE_TO_MAX = 0x10,
+ CU_CTX_FLAGS_MASK = 0x1f,
+} CUctx_flags;
+
+typedef enum CUstream_flags_enum {
+ CU_STREAM_DEFAULT = 0x0,
+ CU_STREAM_NON_BLOCKING = 0x1,
+} CUstream_flags;
+
+typedef enum CUevent_flags_enum {
+ CU_EVENT_DEFAULT = 0x0,
+ CU_EVENT_BLOCKING_SYNC = 0x1,
+ CU_EVENT_DISABLE_TIMING = 0x2,
+ CU_EVENT_INTERPROCESS = 0x4,
+} CUevent_flags;
+
+typedef enum CUarray_format_enum {
+ CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
+ CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
+ CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
+ CU_AD_FORMAT_SIGNED_INT8 = 0x08,
+ CU_AD_FORMAT_SIGNED_INT16 = 0x09,
+ CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
+ CU_AD_FORMAT_HALF = 0x10,
+ CU_AD_FORMAT_FLOAT = 0x20,
+} CUarray_format;
+
+typedef enum CUaddress_mode_enum {
+ CU_TR_ADDRESS_MODE_WRAP = 0,
+ CU_TR_ADDRESS_MODE_CLAMP = 1,
+ CU_TR_ADDRESS_MODE_MIRROR = 2,
+ CU_TR_ADDRESS_MODE_BORDER = 3,
+} CUaddress_mode;
+
+typedef enum CUfilter_mode_enum {
+ CU_TR_FILTER_MODE_POINT = 0,
+ CU_TR_FILTER_MODE_LINEAR = 1,
+} CUfilter_mode;
+
+typedef enum CUdevice_attribute_enum {
+ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
+ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
+ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
+ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
+ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
+ CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
+ CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
+ CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
+ CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
+ CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
+ CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
+ CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
+ CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
+ CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
+ CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
+ CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
+ CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
+ CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
+ CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
+ CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
+ CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
+ CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
+ CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
+ CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
+ CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
+ CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
+ CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,
+ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
+ CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
+ CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
+ CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
+ CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
+ CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
+ CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,
+ CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,
+ CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,
+ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
+ CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
+ CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
+ CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
+ CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
+ CU_DEVICE_ATTRIBUTE_MAX,
+} CUdevice_attribute;
+
+typedef struct CUdevprop_st {
+ int maxThreadsPerBlock;
+ int maxThreadsDim[3];
+ int maxGridSize[3];
+ int sharedMemPerBlock;
+ int totalConstantMemory;
+ int SIMDWidth;
+ int memPitch;
+ int regsPerBlock;
+ int clockRate;
+ int textureAlign;
+} CUdevprop;
+
+typedef enum CUpointer_attribute_enum {
+ CU_POINTER_ATTRIBUTE_CONTEXT = 1,
+ CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
+ CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,
+ CU_POINTER_ATTRIBUTE_HOST_POINTER = 4,
+ CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5,
+ CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,
+ CU_POINTER_ATTRIBUTE_BUFFER_ID = 7,
+ CU_POINTER_ATTRIBUTE_IS_MANAGED = 8,
+} CUpointer_attribute;
+
+typedef enum CUfunction_attribute_enum {
+ CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
+ CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
+ CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
+ CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
+ CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
+ CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
+ CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
+ CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,
+ CU_FUNC_ATTRIBUTE_MAX,
+} CUfunction_attribute;
+
+typedef enum CUfunc_cache_enum {
+ CU_FUNC_CACHE_PREFER_NONE = 0x00,
+ CU_FUNC_CACHE_PREFER_SHARED = 0x01,
+ CU_FUNC_CACHE_PREFER_L1 = 0x02,
+ CU_FUNC_CACHE_PREFER_EQUAL = 0x03,
+} CUfunc_cache;
+
+typedef enum CUsharedconfig_enum {
+ CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00,
+ CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01,
+ CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02,
+} CUsharedconfig;
+
+typedef enum CUmemorytype_enum {
+ CU_MEMORYTYPE_HOST = 0x01,
+ CU_MEMORYTYPE_DEVICE = 0x02,
+ CU_MEMORYTYPE_ARRAY = 0x03,
+ CU_MEMORYTYPE_UNIFIED = 0x04,
+} CUmemorytype;
+
+typedef enum CUcomputemode_enum {
+ CU_COMPUTEMODE_DEFAULT = 0,
+ CU_COMPUTEMODE_EXCLUSIVE = 1,
+ CU_COMPUTEMODE_PROHIBITED = 2,
+ CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3,
+} CUcomputemode;
+
+typedef enum CUjit_option_enum {
+ CU_JIT_MAX_REGISTERS = 0,
+ CU_JIT_THREADS_PER_BLOCK,
+ CU_JIT_WALL_TIME,
+ CU_JIT_INFO_LOG_BUFFER,
+ CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+ CU_JIT_ERROR_LOG_BUFFER,
+ CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+ CU_JIT_OPTIMIZATION_LEVEL,
+ CU_JIT_TARGET_FROM_CUCONTEXT,
+ CU_JIT_TARGET,
+ CU_JIT_FALLBACK_STRATEGY,
+ CU_JIT_GENERATE_DEBUG_INFO,
+ CU_JIT_LOG_VERBOSE,
+ CU_JIT_GENERATE_LINE_INFO,
+ CU_JIT_CACHE_MODE,
+ CU_JIT_NUM_OPTIONS,
+} CUjit_option;
+
+typedef enum CUjit_target_enum {
+ CU_TARGET_COMPUTE_10 = 10,
+ CU_TARGET_COMPUTE_11 = 11,
+ CU_TARGET_COMPUTE_12 = 12,
+ CU_TARGET_COMPUTE_13 = 13,
+ CU_TARGET_COMPUTE_20 = 20,
+ CU_TARGET_COMPUTE_21 = 21,
+ CU_TARGET_COMPUTE_30 = 30,
+ CU_TARGET_COMPUTE_32 = 32,
+ CU_TARGET_COMPUTE_35 = 35,
+ CU_TARGET_COMPUTE_50 = 50,
+} CUjit_target;
+
+typedef enum CUjit_fallback_enum {
+ CU_PREFER_PTX = 0,
+ CU_PREFER_BINARY,
+} CUjit_fallback;
+
+typedef enum CUjit_cacheMode_enum {
+ CU_JIT_CACHE_OPTION_NONE = 0,
+ CU_JIT_CACHE_OPTION_CG,
+ CU_JIT_CACHE_OPTION_CA,
+} CUjit_cacheMode;
+
+typedef enum CUjitInputType_enum {
+ CU_JIT_INPUT_CUBIN = 0,
+ CU_JIT_INPUT_PTX,
+ CU_JIT_INPUT_FATBINARY,
+ CU_JIT_INPUT_OBJECT,
+ CU_JIT_INPUT_LIBRARY,
+ CU_JIT_NUM_INPUT_TYPES,
+} CUjitInputType;
+
+typedef struct CUlinkState_st* CUlinkState;
+
+typedef enum CUgraphicsRegisterFlags_enum {
+ CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00,
+ CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01,
+ CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02,
+ CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04,
+ CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08,
+} CUgraphicsRegisterFlags;
+
+typedef enum CUgraphicsMapResourceFlags_enum {
+ CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
+ CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
+ CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
+} CUgraphicsMapResourceFlags;
+
+typedef enum CUarray_cubemap_face_enum {
+ CU_CUBEMAP_FACE_POSITIVE_X = 0x00,
+ CU_CUBEMAP_FACE_NEGATIVE_X = 0x01,
+ CU_CUBEMAP_FACE_POSITIVE_Y = 0x02,
+ CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03,
+ CU_CUBEMAP_FACE_POSITIVE_Z = 0x04,
+ CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05,
+} CUarray_cubemap_face;
+
+typedef enum CUlimit_enum {
+ CU_LIMIT_STACK_SIZE = 0x00,
+ CU_LIMIT_PRINTF_FIFO_SIZE = 0x01,
+ CU_LIMIT_MALLOC_HEAP_SIZE = 0x02,
+ CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03,
+ CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04,
+ CU_LIMIT_MAX,
+} CUlimit;
+
+typedef enum CUresourcetype_enum {
+ CU_RESOURCE_TYPE_ARRAY = 0x00,
+ CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
+ CU_RESOURCE_TYPE_LINEAR = 0x02,
+ CU_RESOURCE_TYPE_PITCH2D = 0x03,
+} CUresourcetype;
+
+typedef enum cudaError_enum {
+ CUDA_SUCCESS = 0,
+ CUDA_ERROR_INVALID_VALUE = 1,
+ CUDA_ERROR_OUT_OF_MEMORY = 2,
+ CUDA_ERROR_NOT_INITIALIZED = 3,
+ CUDA_ERROR_DEINITIALIZED = 4,
+ CUDA_ERROR_PROFILER_DISABLED = 5,
+ CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6,
+ CUDA_ERROR_PROFILER_ALREADY_STARTED = 7,
+ CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8,
+ CUDA_ERROR_NO_DEVICE = 100,
+ CUDA_ERROR_INVALID_DEVICE = 101,
+ CUDA_ERROR_INVALID_IMAGE = 200,
+ CUDA_ERROR_INVALID_CONTEXT = 201,
+ CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,
+ CUDA_ERROR_MAP_FAILED = 205,
+ CUDA_ERROR_UNMAP_FAILED = 206,
+ CUDA_ERROR_ARRAY_IS_MAPPED = 207,
+ CUDA_ERROR_ALREADY_MAPPED = 208,
+ CUDA_ERROR_NO_BINARY_FOR_GPU = 209,
+ CUDA_ERROR_ALREADY_ACQUIRED = 210,
+ CUDA_ERROR_NOT_MAPPED = 211,
+ CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212,
+ CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213,
+ CUDA_ERROR_ECC_UNCORRECTABLE = 214,
+ CUDA_ERROR_UNSUPPORTED_LIMIT = 215,
+ CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216,
+ CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,
+ CUDA_ERROR_INVALID_PTX = 218,
+ CUDA_ERROR_INVALID_SOURCE = 300,
+ CUDA_ERROR_FILE_NOT_FOUND = 301,
+ CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
+ CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303,
+ CUDA_ERROR_OPERATING_SYSTEM = 304,
+ CUDA_ERROR_INVALID_HANDLE = 400,
+ CUDA_ERROR_NOT_FOUND = 500,
+ CUDA_ERROR_NOT_READY = 600,
+ CUDA_ERROR_ILLEGAL_ADDRESS = 700,
+ CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701,
+ CUDA_ERROR_LAUNCH_TIMEOUT = 702,
+ CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,
+ CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704,
+ CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705,
+ CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708,
+ CUDA_ERROR_CONTEXT_IS_DESTROYED = 709,
+ CUDA_ERROR_ASSERT = 710,
+ CUDA_ERROR_TOO_MANY_PEERS = 711,
+ CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
+ CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713,
+ CUDA_ERROR_HARDWARE_STACK_ERROR = 714,
+ CUDA_ERROR_ILLEGAL_INSTRUCTION = 715,
+ CUDA_ERROR_MISALIGNED_ADDRESS = 716,
+ CUDA_ERROR_INVALID_ADDRESS_SPACE = 717,
+ CUDA_ERROR_INVALID_PC = 718,
+ CUDA_ERROR_LAUNCH_FAILED = 719,
+ CUDA_ERROR_NOT_PERMITTED = 800,
+ CUDA_ERROR_NOT_SUPPORTED = 801,
+ CUDA_ERROR_UNKNOWN = 999,
+} CUresult;
+
+typedef void* CUstreamCallback;
+
+typedef struct CUDA_MEMCPY2D_st {
+ size_t srcXInBytes;
+ size_t srcY;
+ CUmemorytype srcMemoryType;
+ const void* srcHost;
+ CUdeviceptr srcDevice;
+ CUarray srcArray;
+ size_t srcPitch;
+ size_t dstXInBytes;
+ size_t dstY;
+ CUmemorytype dstMemoryType;
+ void* dstHost;
+ CUdeviceptr dstDevice;
+ CUarray dstArray;
+ size_t dstPitch;
+ size_t WidthInBytes;
+ size_t Height;
+} CUDA_MEMCPY2D;
+
+typedef struct CUDA_MEMCPY3D_st {
+ size_t srcXInBytes;
+ size_t srcY;
+ size_t srcZ;
+ size_t srcLOD;
+ CUmemorytype srcMemoryType;
+ const void* srcHost;
+ CUdeviceptr srcDevice;
+ CUarray srcArray;
+ void* reserved0;
+ size_t srcPitch;
+ size_t srcHeight;
+ size_t dstXInBytes;
+ size_t dstY;
+ size_t dstZ;
+ size_t dstLOD;
+ CUmemorytype dstMemoryType;
+ void* dstHost;
+ CUdeviceptr dstDevice;
+ CUarray dstArray;
+ void* reserved1;
+ size_t dstPitch;
+ size_t dstHeight;
+ size_t WidthInBytes;
+ size_t Height;
+ size_t Depth;
+} CUDA_MEMCPY3D;
+
+typedef struct CUDA_MEMCPY3D_PEER_st {
+ size_t srcXInBytes;
+ size_t srcY;
+ size_t srcZ;
+ size_t srcLOD;
+ CUmemorytype srcMemoryType;
+ const void* srcHost;
+ CUdeviceptr srcDevice;
+ CUarray srcArray;
+ CUcontext srcContext;
+ size_t srcPitch;
+ size_t srcHeight;
+ size_t dstXInBytes;
+ size_t dstY;
+ size_t dstZ;
+ size_t dstLOD;
+ CUmemorytype dstMemoryType;
+ void* dstHost;
+ CUdeviceptr dstDevice;
+ CUarray dstArray;
+ CUcontext dstContext;
+ size_t dstPitch;
+ size_t dstHeight;
+ size_t WidthInBytes;
+ size_t Height;
+ size_t Depth;
+} CUDA_MEMCPY3D_PEER;
+
+typedef struct CUDA_ARRAY_DESCRIPTOR_st {
+ size_t Width;
+ size_t Height;
+ CUarray_format Format;
+ unsigned NumChannels;
+} CUDA_ARRAY_DESCRIPTOR;
+
+typedef struct CUDA_ARRAY3D_DESCRIPTOR_st {
+ size_t Width;
+ size_t Height;
+ size_t Depth;
+ CUarray_format Format;
+ unsigned NumChannels;
+ unsigned Flags;
+} CUDA_ARRAY3D_DESCRIPTOR;
+
+typedef struct CUDA_RESOURCE_DESC_st {
+ CUresourcetype resType;
+ union {
+ struct {
+ CUarray hArray;
+ } array;
+ struct {
+ CUmipmappedArray hMipmappedArray;
+ } mipmap;
+ struct {
+ CUdeviceptr devPtr;
+ CUarray_format format;
+ unsigned numChannels;
+ size_t sizeInBytes;
+ } linear;
+ struct {
+ CUdeviceptr devPtr;
+ CUarray_format format;
+ unsigned numChannels;
+ size_t width;
+ size_t height;
+ size_t pitchInBytes;
+ } pitch2D;
+ struct {
+ int reserved[32];
+ } reserved;
+ } res;
+ unsigned flags;
+} CUDA_RESOURCE_DESC;
+
+typedef struct CUDA_TEXTURE_DESC_st {
+ CUaddress_mode addressMode[3];
+ CUfilter_mode filterMode;
+ unsigned flags;
+ unsigned maxAnisotropy;
+ CUfilter_mode mipmapFilterMode;
+ float mipmapLevelBias;
+ float minMipmapLevelClamp;
+ float maxMipmapLevelClamp;
+ int reserved[16];
+} CUDA_TEXTURE_DESC;
+
+typedef enum CUresourceViewFormat_enum {
+ CU_RES_VIEW_FORMAT_NONE = 0x00,
+ CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01,
+ CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02,
+ CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03,
+ CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04,
+ CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05,
+ CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06,
+ CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07,
+ CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08,
+ CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09,
+ CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a,
+ CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b,
+ CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c,
+ CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d,
+ CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e,
+ CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f,
+ CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10,
+ CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11,
+ CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12,
+ CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13,
+ CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14,
+ CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15,
+ CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16,
+ CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17,
+ CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18,
+ CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19,
+ CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a,
+ CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b,
+ CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c,
+ CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d,
+ CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e,
+ CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f,
+ CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20,
+ CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21,
+ CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22,
+} CUresourceViewFormat;
+
+typedef struct CUDA_RESOURCE_VIEW_DESC_st {
+ CUresourceViewFormat format;
+ size_t width;
+ size_t height;
+ size_t depth;
+ unsigned firstMipmapLevel;
+ unsigned lastMipmapLevel;
+ unsigned firstLayer;
+ unsigned lastLayer;
+ unsigned reserved[16];
+} CUDA_RESOURCE_VIEW_DESC;
+
+typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st {
+ unsigned p2pToken;
+ unsigned vaSpaceToken;
+} CUDA_POINTER_ATTRIBUTE_P2P_TOKENS;
+typedef unsigned GLenum;
+typedef unsigned GLuint;
+typedef int GLint;
+
+typedef enum CUGLDeviceList_enum {
+ CU_GL_DEVICE_LIST_ALL = 0x01,
+ CU_GL_DEVICE_LIST_CURRENT_FRAME = 0x02,
+ CU_GL_DEVICE_LIST_NEXT_FRAME = 0x03,
+} CUGLDeviceList;
+
+typedef enum CUGLmap_flags_enum {
+ CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00,
+ CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
+ CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
+} CUGLmap_flags;
+
+#ifdef _WIN32
+# define CUDAAPI __stdcall
+# define CUDA_CB __stdcall
+#else
+# define CUDAAPI
+# define CUDA_CB
+#endif
+
+/* Function types. */
+typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char* pStr);
+typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char* pStr);
+typedef CUresult CUDAAPI tcuInit(unsigned Flags);
+typedef CUresult CUDAAPI tcuDriverGetVersion(int* driverVersion);
+typedef CUresult CUDAAPI tcuDeviceGet(CUdevice* device, int ordinal);
+typedef CUresult CUDAAPI tcuDeviceGetCount(int* count);
+typedef CUresult CUDAAPI tcuDeviceGetName(char* name, int len, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceTotalMem_v2(size_t* bytes, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop* prop, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceComputeCapability(int* major, int* minor, CUdevice dev);
+typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext* pctx, unsigned flags, CUdevice dev);
+typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
+typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext ctx);
+typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext* pctx);
+typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
+typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext* pctx);
+typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice* device);
+typedef CUresult CUDAAPI tcuCtxSynchronize(void);
+typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
+typedef CUresult CUDAAPI tcuCtxGetLimit(size_t* pvalue, CUlimit limit);
+typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache* pconfig);
+typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config);
+typedef CUresult CUDAAPI tcuCtxGetSharedMemConfig(CUsharedconfig* pConfig);
+typedef CUresult CUDAAPI tcuCtxSetSharedMemConfig(CUsharedconfig config);
+typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned* version);
+typedef CUresult CUDAAPI tcuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority);
+typedef CUresult CUDAAPI tcuCtxAttach(CUcontext* pctx, unsigned flags);
+typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
+typedef CUresult CUDAAPI tcuModuleLoad(CUmodule* module, const char* fname);
+typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule* module, const void* image);
+typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule* module, const void* fatCubin);
+typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
+typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name);
+typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name);
+typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name);
+typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name);
+typedef CUresult CUDAAPI tcuLinkCreate(unsigned numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut);
+typedef CUresult CUDAAPI tcuLinkAddData(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuLinkAddFile(CUlinkState state, CUjitInputType type, const char* path, unsigned numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void* cubinOut, size_t* sizeOut);
+typedef CUresult CUDAAPI tcuLinkDestroy(CUlinkState state);
+typedef CUresult CUDAAPI tcuMemGetInfo_v2(size_t* free, size_t* total);
+typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize);
+typedef CUresult CUDAAPI tcuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned ElementSizeBytes);
+typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuMemGetAddressRange_v2(CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuMemAllocHost_v2(void* pp, size_t bytesize);
+typedef CUresult CUDAAPI tcuMemFreeHost(void* p);
+typedef CUresult CUDAAPI tcuMemHostAlloc(void* pp, size_t bytesize, unsigned Flags);
+typedef CUresult CUDAAPI tcuMemHostGetDevicePointer_v2(CUdeviceptr* pdptr, void* p, unsigned Flags);
+typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned* pFlags, void* p);
+typedef CUresult CUDAAPI tcuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned flags);
+typedef CUresult CUDAAPI tcuDeviceGetByPCIBusId(CUdevice* dev, const char* pciBusId);
+typedef CUresult CUDAAPI tcuDeviceGetPCIBusId(char* pciBusId, int len, CUdevice dev);
+typedef CUresult CUDAAPI tcuIpcGetEventHandle(CUipcEventHandle* pHandle, CUevent event);
+typedef CUresult CUDAAPI tcuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandle handle);
+typedef CUresult CUDAAPI tcuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned Flags);
+typedef CUresult CUDAAPI tcuIpcCloseMemHandle(CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuMemHostRegister(void* p, size_t bytesize, unsigned Flags);
+typedef CUresult CUDAAPI tcuMemHostUnregister(void* p);
+typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyDtoH_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyDtoD_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyHtoA_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyAtoH_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyAtoA_v2(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D* pCopy);
+typedef CUresult CUDAAPI tcuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D* pCopy);
+typedef CUresult CUDAAPI tcuMemcpy3D_v2(const CUDA_MEMCPY3D* pCopy);
+typedef CUresult CUDAAPI tcuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER* pCopy);
+typedef CUresult CUDAAPI tcuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyDtoHAsync_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyDtoDAsync_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyHtoAAsync_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyAtoHAsync_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpy2DAsync_v2(const CUDA_MEMCPY2D* pCopy, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpy3DAsync_v2(const CUDA_MEMCPY3D* pCopy, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER* pCopy, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD8_v2(CUdeviceptr dstDevice, unsigned uc, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD16_v2(CUdeviceptr dstDevice, unsigned us, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD32_v2(CUdeviceptr dstDevice, unsigned ui, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned uc, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned us, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned ui, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned uc, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned us, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned ui, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned uc, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned us, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned ui, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuArrayCreate_v2(CUarray* pHandle, const CUDA_ARRAY_DESCRIPTOR* pAllocateArray);
+typedef CUresult CUDAAPI tcuArrayGetDescriptor_v2(CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor, CUarray hArray);
+typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray);
+typedef CUresult CUDAAPI tcuArray3DCreate_v2(CUarray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray);
+typedef CUresult CUDAAPI tcuArray3DGetDescriptor_v2(CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor, CUarray hArray);
+typedef CUresult CUDAAPI tcuMipmappedArrayCreate(CUmipmappedArray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, unsigned numMipmapLevels);
+typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned level);
+typedef CUresult CUDAAPI tcuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray);
+typedef CUresult CUDAAPI tcuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr);
+typedef CUresult CUDAAPI tcuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr);
+typedef CUresult CUDAAPI tcuStreamCreate(CUstream* phStream, unsigned Flags);
+typedef CUresult CUDAAPI tcuStreamCreateWithPriority(CUstream* phStream, unsigned flags, int priority);
+typedef CUresult CUDAAPI tcuStreamGetPriority(CUstream hStream, int* priority);
+typedef CUresult CUDAAPI tcuStreamGetFlags(CUstream hStream, unsigned* flags);
+typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned Flags);
+typedef CUresult CUDAAPI tcuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void* userData, unsigned flags);
+typedef CUresult CUDAAPI tcuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned flags);
+typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
+typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
+typedef CUresult CUDAAPI tcuStreamDestroy_v2(CUstream hStream);
+typedef CUresult CUDAAPI tcuEventCreate(CUevent* phEvent, unsigned Flags);
+typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
+typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
+typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
+typedef CUresult CUDAAPI tcuEventDestroy_v2(CUevent hEvent);
+typedef CUresult CUDAAPI tcuEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd);
+typedef CUresult CUDAAPI tcuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc);
+typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
+typedef CUresult CUDAAPI tcuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config);
+typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned gridDimX, unsigned gridDimY, unsigned gridDimZ, unsigned blockDimX, unsigned blockDimY, unsigned blockDimZ, unsigned sharedMemBytes, CUstream hStream, void* kernelParams, void* extra);
+typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
+typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned bytes);
+typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned numbytes);
+typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned value);
+typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
+typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void* ptr, unsigned numbytes);
+typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
+typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
+typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
+typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned Flags);
+typedef CUresult CUDAAPI tcuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned Flags);
+typedef CUresult CUDAAPI tcuTexRefSetAddress_v2(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
+typedef CUresult CUDAAPI tcuTexRefSetAddress2D_v3(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR* desc, CUdeviceptr dptr, size_t Pitch);
+typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
+typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
+typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
+typedef CUresult CUDAAPI tcuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm);
+typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias);
+typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp);
+typedef CUresult CUDAAPI tcuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned maxAniso);
+typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned Flags);
+typedef CUresult CUDAAPI tcuTexRefGetAddress_v2(CUdeviceptr* pdptr, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray* phArray, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetMipmappedArray(CUmipmappedArray* phMipmappedArray, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode* pam, CUtexref hTexRef, int dim);
+typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode* pfm, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format* pFormat, int* pNumChannels, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetMipmapFilterMode(CUfilter_mode* pfm, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelBias(float* pbias, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetMaxAnisotropy(int* pmaxAniso, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned* pFlags, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref* pTexRef);
+typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned Flags);
+typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray* phArray, CUsurfref hSurfRef);
+typedef CUresult CUDAAPI tcuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc);
+typedef CUresult CUDAAPI tcuTexObjectDestroy(CUtexObject texObject);
+typedef CUresult CUDAAPI tcuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUtexObject texObject);
+typedef CUresult CUDAAPI tcuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC* pTexDesc, CUtexObject texObject);
+typedef CUresult CUDAAPI tcuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC* pResViewDesc, CUtexObject texObject);
+typedef CUresult CUDAAPI tcuSurfObjectCreate(CUsurfObject* pSurfObject, const CUDA_RESOURCE_DESC* pResDesc);
+typedef CUresult CUDAAPI tcuSurfObjectDestroy(CUsurfObject surfObject);
+typedef CUresult CUDAAPI tcuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUsurfObject surfObject);
+typedef CUresult CUDAAPI tcuDeviceCanAccessPeer(int* canAccessPeer, CUdevice dev, CUdevice peerDev);
+typedef CUresult CUDAAPI tcuCtxEnablePeerAccess(CUcontext peerContext, unsigned Flags);
+typedef CUresult CUDAAPI tcuCtxDisablePeerAccess(CUcontext peerContext);
+typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
+typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned arrayIndex, unsigned mipLevel);
+typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource);
+typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer_v2(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource);
+typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned flags);
+typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream);
+typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream);
+typedef CUresult CUDAAPI tcuGetExportTable(const void* ppExportTable, const CUuuid* pExportTableId);
+
+typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned Flags);
+typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned Flags);
+typedef CUresult CUDAAPI tcuGLGetDevices(unsigned* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned cudaDeviceCount, CUGLDeviceList deviceList);
+typedef CUresult CUDAAPI tcuGLCtxCreate_v2(CUcontext* pCtx, unsigned Flags, CUdevice device);
+typedef CUresult CUDAAPI tcuGLInit(void);
+typedef CUresult CUDAAPI tcuGLRegisterBufferObject(GLuint buffer);
+typedef CUresult CUDAAPI tcuGLMapBufferObject_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer);
+typedef CUresult CUDAAPI tcuGLUnmapBufferObject(GLuint buffer);
+typedef CUresult CUDAAPI tcuGLUnregisterBufferObject(GLuint buffer);
+typedef CUresult CUDAAPI tcuGLSetBufferObjectMapFlags(GLuint buffer, unsigned Flags);
+typedef CUresult CUDAAPI tcuGLMapBufferObjectAsync_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer, CUstream hStream);
+typedef CUresult CUDAAPI tcuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream);
+
+
+/* Function declarations. */
+extern tcuGetErrorString *cuGetErrorString;
+extern tcuGetErrorName *cuGetErrorName;
+extern tcuInit *cuInit;
+extern tcuDriverGetVersion *cuDriverGetVersion;
+extern tcuDeviceGet *cuDeviceGet;
+extern tcuDeviceGetCount *cuDeviceGetCount;
+extern tcuDeviceGetName *cuDeviceGetName;
+extern tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
+extern tcuDeviceGetAttribute *cuDeviceGetAttribute;
+extern tcuDeviceGetProperties *cuDeviceGetProperties;
+extern tcuDeviceComputeCapability *cuDeviceComputeCapability;
+extern tcuCtxCreate_v2 *cuCtxCreate_v2;
+extern tcuCtxDestroy_v2 *cuCtxDestroy_v2;
+extern tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
+extern tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
+extern tcuCtxSetCurrent *cuCtxSetCurrent;
+extern tcuCtxGetCurrent *cuCtxGetCurrent;
+extern tcuCtxGetDevice *cuCtxGetDevice;
+extern tcuCtxSynchronize *cuCtxSynchronize;
+extern tcuCtxSetLimit *cuCtxSetLimit;
+extern tcuCtxGetLimit *cuCtxGetLimit;
+extern tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
+extern tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
+extern tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
+extern tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
+extern tcuCtxGetApiVersion *cuCtxGetApiVersion;
+extern tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
+extern tcuCtxAttach *cuCtxAttach;
+extern tcuCtxDetach *cuCtxDetach;
+extern tcuModuleLoad *cuModuleLoad;
+extern tcuModuleLoadData *cuModuleLoadData;
+extern tcuModuleLoadDataEx *cuModuleLoadDataEx;
+extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
+extern tcuModuleUnload *cuModuleUnload;
+extern tcuModuleGetFunction *cuModuleGetFunction;
+extern tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
+extern tcuModuleGetTexRef *cuModuleGetTexRef;
+extern tcuModuleGetSurfRef *cuModuleGetSurfRef;
+extern tcuLinkCreate *cuLinkCreate;
+extern tcuLinkAddData *cuLinkAddData;
+extern tcuLinkAddFile *cuLinkAddFile;
+extern tcuLinkComplete *cuLinkComplete;
+extern tcuLinkDestroy *cuLinkDestroy;
+extern tcuMemGetInfo_v2 *cuMemGetInfo_v2;
+extern tcuMemAlloc_v2 *cuMemAlloc_v2;
+extern tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
+extern tcuMemFree_v2 *cuMemFree_v2;
+extern tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
+extern tcuMemAllocHost_v2 *cuMemAllocHost_v2;
+extern tcuMemFreeHost *cuMemFreeHost;
+extern tcuMemHostAlloc *cuMemHostAlloc;
+extern tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
+extern tcuMemHostGetFlags *cuMemHostGetFlags;
+extern tcuMemAllocManaged *cuMemAllocManaged;
+extern tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
+extern tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
+extern tcuIpcGetEventHandle *cuIpcGetEventHandle;
+extern tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
+extern tcuIpcGetMemHandle *cuIpcGetMemHandle;
+extern tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
+extern tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
+extern tcuMemHostRegister *cuMemHostRegister;
+extern tcuMemHostUnregister *cuMemHostUnregister;
+extern tcuMemcpy *cuMemcpy;
+extern tcuMemcpyPeer *cuMemcpyPeer;
+extern tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
+extern tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
+extern tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
+extern tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
+extern tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
+extern tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
+extern tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
+extern tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
+extern tcuMemcpy2D_v2 *cuMemcpy2D_v2;
+extern tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
+extern tcuMemcpy3D_v2 *cuMemcpy3D_v2;
+extern tcuMemcpy3DPeer *cuMemcpy3DPeer;
+extern tcuMemcpyAsync *cuMemcpyAsync;
+extern tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
+extern tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
+extern tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
+extern tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
+extern tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
+extern tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
+extern tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
+extern tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
+extern tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
+extern tcuMemsetD8_v2 *cuMemsetD8_v2;
+extern tcuMemsetD16_v2 *cuMemsetD16_v2;
+extern tcuMemsetD32_v2 *cuMemsetD32_v2;
+extern tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
+extern tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
+extern tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
+extern tcuMemsetD8Async *cuMemsetD8Async;
+extern tcuMemsetD16Async *cuMemsetD16Async;
+extern tcuMemsetD32Async *cuMemsetD32Async;
+extern tcuMemsetD2D8Async *cuMemsetD2D8Async;
+extern tcuMemsetD2D16Async *cuMemsetD2D16Async;
+extern tcuMemsetD2D32Async *cuMemsetD2D32Async;
+extern tcuArrayCreate_v2 *cuArrayCreate_v2;
+extern tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
+extern tcuArrayDestroy *cuArrayDestroy;
+extern tcuArray3DCreate_v2 *cuArray3DCreate_v2;
+extern tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
+extern tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
+extern tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
+extern tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
+extern tcuPointerGetAttribute *cuPointerGetAttribute;
+extern tcuPointerSetAttribute *cuPointerSetAttribute;
+extern tcuStreamCreate *cuStreamCreate;
+extern tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
+extern tcuStreamGetPriority *cuStreamGetPriority;
+extern tcuStreamGetFlags *cuStreamGetFlags;
+extern tcuStreamWaitEvent *cuStreamWaitEvent;
+extern tcuStreamAddCallback *cuStreamAddCallback;
+extern tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
+extern tcuStreamQuery *cuStreamQuery;
+extern tcuStreamSynchronize *cuStreamSynchronize;
+extern tcuStreamDestroy_v2 *cuStreamDestroy_v2;
+extern tcuEventCreate *cuEventCreate;
+extern tcuEventRecord *cuEventRecord;
+extern tcuEventQuery *cuEventQuery;
+extern tcuEventSynchronize *cuEventSynchronize;
+extern tcuEventDestroy_v2 *cuEventDestroy_v2;
+extern tcuEventElapsedTime *cuEventElapsedTime;
+extern tcuFuncGetAttribute *cuFuncGetAttribute;
+extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
+extern tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
+extern tcuLaunchKernel *cuLaunchKernel;
+extern tcuFuncSetBlockShape *cuFuncSetBlockShape;
+extern tcuFuncSetSharedSize *cuFuncSetSharedSize;
+extern tcuParamSetSize *cuParamSetSize;
+extern tcuParamSeti *cuParamSeti;
+extern tcuParamSetf *cuParamSetf;
+extern tcuParamSetv *cuParamSetv;
+extern tcuLaunch *cuLaunch;
+extern tcuLaunchGrid *cuLaunchGrid;
+extern tcuLaunchGridAsync *cuLaunchGridAsync;
+extern tcuParamSetTexRef *cuParamSetTexRef;
+extern tcuTexRefSetArray *cuTexRefSetArray;
+extern tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
+extern tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
+extern tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
+extern tcuTexRefSetFormat *cuTexRefSetFormat;
+extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
+extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
+extern tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
+extern tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
+extern tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
+extern tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
+extern tcuTexRefSetFlags *cuTexRefSetFlags;
+extern tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
+extern tcuTexRefGetArray *cuTexRefGetArray;
+extern tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
+extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
+extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
+extern tcuTexRefGetFormat *cuTexRefGetFormat;
+extern tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
+extern tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
+extern tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
+extern tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
+extern tcuTexRefGetFlags *cuTexRefGetFlags;
+extern tcuTexRefCreate *cuTexRefCreate;
+extern tcuTexRefDestroy *cuTexRefDestroy;
+extern tcuSurfRefSetArray *cuSurfRefSetArray;
+extern tcuSurfRefGetArray *cuSurfRefGetArray;
+extern tcuTexObjectCreate *cuTexObjectCreate;
+extern tcuTexObjectDestroy *cuTexObjectDestroy;
+extern tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
+extern tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
+extern tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
+extern tcuSurfObjectCreate *cuSurfObjectCreate;
+extern tcuSurfObjectDestroy *cuSurfObjectDestroy;
+extern tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
+extern tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
+extern tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
+extern tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
+extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
+extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
+extern tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
+extern tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
+extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
+extern tcuGraphicsMapResources *cuGraphicsMapResources;
+extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
+extern tcuGetExportTable *cuGetExportTable;
+
+extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
+extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
+extern tcuGLGetDevices *cuGLGetDevices;
+extern tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
+extern tcuGLInit *cuGLInit;
+extern tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
+extern tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
+extern tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
+extern tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
+extern tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
+extern tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
+extern tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
+
+
+enum {
+ CUEW_SUCCESS = 0,
+ CUEW_ERROR_OPEN_FAILED = -1,
+ CUEW_ERROR_ATEXIT_FAILED = -2,
+};
+
+int cuewInit(void);
+const char *cuewErrorString(CUresult result);
+const char *cuewCompilerPath(void);
+int cuewCompilerVersion(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CUEW_H__ */
diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c
new file mode 100644
index 00000000000..da892efc0f4
--- /dev/null
+++ b/extern/cuew/src/cuew.c
@@ -0,0 +1,710 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#ifdef _MSC_VER
+# define snprintf _snprintf
+# define popen _popen
+# define pclose _pclose
+# define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include <cuew.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#ifdef _WIN32
+# define WIN32_LEAN_AND_MEAN
+# define VC_EXTRALEAN
+# include <windows.h>
+
+/* Utility macros. */
+
+typedef HMODULE DynamicLibrary;
+
+# define dynamic_library_open(path) LoadLibrary(path)
+# define dynamic_library_close(lib) FreeLibrary(lib)
+# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
+#else
+# include <dlfcn.h>
+
+typedef void* DynamicLibrary;
+
+# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
+# define dynamic_library_close(lib) dlclose(lib)
+# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
+#endif
+
+#define CUDA_LIBRARY_FIND_CHECKED(name) \
+ name = (t##name *)dynamic_library_find(lib, #name); \
+ assert(name);
+
+#define CUDA_LIBRARY_FIND(name) \
+ name = (t##name *)dynamic_library_find(lib, #name);
+
+static DynamicLibrary lib;
+
+/* Function definitions. */
+tcuGetErrorString *cuGetErrorString;
+tcuGetErrorName *cuGetErrorName;
+tcuInit *cuInit;
+tcuDriverGetVersion *cuDriverGetVersion;
+tcuDeviceGet *cuDeviceGet;
+tcuDeviceGetCount *cuDeviceGetCount;
+tcuDeviceGetName *cuDeviceGetName;
+tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
+tcuDeviceGetAttribute *cuDeviceGetAttribute;
+tcuDeviceGetProperties *cuDeviceGetProperties;
+tcuDeviceComputeCapability *cuDeviceComputeCapability;
+tcuCtxCreate_v2 *cuCtxCreate_v2;
+tcuCtxDestroy_v2 *cuCtxDestroy_v2;
+tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
+tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
+tcuCtxSetCurrent *cuCtxSetCurrent;
+tcuCtxGetCurrent *cuCtxGetCurrent;
+tcuCtxGetDevice *cuCtxGetDevice;
+tcuCtxSynchronize *cuCtxSynchronize;
+tcuCtxSetLimit *cuCtxSetLimit;
+tcuCtxGetLimit *cuCtxGetLimit;
+tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
+tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
+tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
+tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
+tcuCtxGetApiVersion *cuCtxGetApiVersion;
+tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
+tcuCtxAttach *cuCtxAttach;
+tcuCtxDetach *cuCtxDetach;
+tcuModuleLoad *cuModuleLoad;
+tcuModuleLoadData *cuModuleLoadData;
+tcuModuleLoadDataEx *cuModuleLoadDataEx;
+tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
+tcuModuleUnload *cuModuleUnload;
+tcuModuleGetFunction *cuModuleGetFunction;
+tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
+tcuModuleGetTexRef *cuModuleGetTexRef;
+tcuModuleGetSurfRef *cuModuleGetSurfRef;
+tcuLinkCreate *cuLinkCreate;
+tcuLinkAddData *cuLinkAddData;
+tcuLinkAddFile *cuLinkAddFile;
+tcuLinkComplete *cuLinkComplete;
+tcuLinkDestroy *cuLinkDestroy;
+tcuMemGetInfo_v2 *cuMemGetInfo_v2;
+tcuMemAlloc_v2 *cuMemAlloc_v2;
+tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
+tcuMemFree_v2 *cuMemFree_v2;
+tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
+tcuMemAllocHost_v2 *cuMemAllocHost_v2;
+tcuMemFreeHost *cuMemFreeHost;
+tcuMemHostAlloc *cuMemHostAlloc;
+tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
+tcuMemHostGetFlags *cuMemHostGetFlags;
+tcuMemAllocManaged *cuMemAllocManaged;
+tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
+tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
+tcuIpcGetEventHandle *cuIpcGetEventHandle;
+tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
+tcuIpcGetMemHandle *cuIpcGetMemHandle;
+tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
+tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
+tcuMemHostRegister *cuMemHostRegister;
+tcuMemHostUnregister *cuMemHostUnregister;
+tcuMemcpy *cuMemcpy;
+tcuMemcpyPeer *cuMemcpyPeer;
+tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
+tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
+tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
+tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
+tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
+tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
+tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
+tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
+tcuMemcpy2D_v2 *cuMemcpy2D_v2;
+tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
+tcuMemcpy3D_v2 *cuMemcpy3D_v2;
+tcuMemcpy3DPeer *cuMemcpy3DPeer;
+tcuMemcpyAsync *cuMemcpyAsync;
+tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
+tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
+tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
+tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
+tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
+tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
+tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
+tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
+tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
+tcuMemsetD8_v2 *cuMemsetD8_v2;
+tcuMemsetD16_v2 *cuMemsetD16_v2;
+tcuMemsetD32_v2 *cuMemsetD32_v2;
+tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
+tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
+tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
+tcuMemsetD8Async *cuMemsetD8Async;
+tcuMemsetD16Async *cuMemsetD16Async;
+tcuMemsetD32Async *cuMemsetD32Async;
+tcuMemsetD2D8Async *cuMemsetD2D8Async;
+tcuMemsetD2D16Async *cuMemsetD2D16Async;
+tcuMemsetD2D32Async *cuMemsetD2D32Async;
+tcuArrayCreate_v2 *cuArrayCreate_v2;
+tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
+tcuArrayDestroy *cuArrayDestroy;
+tcuArray3DCreate_v2 *cuArray3DCreate_v2;
+tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
+tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
+tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
+tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
+tcuPointerGetAttribute *cuPointerGetAttribute;
+tcuPointerSetAttribute *cuPointerSetAttribute;
+tcuStreamCreate *cuStreamCreate;
+tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
+tcuStreamGetPriority *cuStreamGetPriority;
+tcuStreamGetFlags *cuStreamGetFlags;
+tcuStreamWaitEvent *cuStreamWaitEvent;
+tcuStreamAddCallback *cuStreamAddCallback;
+tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
+tcuStreamQuery *cuStreamQuery;
+tcuStreamSynchronize *cuStreamSynchronize;
+tcuStreamDestroy_v2 *cuStreamDestroy_v2;
+tcuEventCreate *cuEventCreate;
+tcuEventRecord *cuEventRecord;
+tcuEventQuery *cuEventQuery;
+tcuEventSynchronize *cuEventSynchronize;
+tcuEventDestroy_v2 *cuEventDestroy_v2;
+tcuEventElapsedTime *cuEventElapsedTime;
+tcuFuncGetAttribute *cuFuncGetAttribute;
+tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
+tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
+tcuLaunchKernel *cuLaunchKernel;
+tcuFuncSetBlockShape *cuFuncSetBlockShape;
+tcuFuncSetSharedSize *cuFuncSetSharedSize;
+tcuParamSetSize *cuParamSetSize;
+tcuParamSeti *cuParamSeti;
+tcuParamSetf *cuParamSetf;
+tcuParamSetv *cuParamSetv;
+tcuLaunch *cuLaunch;
+tcuLaunchGrid *cuLaunchGrid;
+tcuLaunchGridAsync *cuLaunchGridAsync;
+tcuParamSetTexRef *cuParamSetTexRef;
+tcuTexRefSetArray *cuTexRefSetArray;
+tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
+tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
+tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
+tcuTexRefSetFormat *cuTexRefSetFormat;
+tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
+tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
+tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
+tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
+tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
+tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
+tcuTexRefSetFlags *cuTexRefSetFlags;
+tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
+tcuTexRefGetArray *cuTexRefGetArray;
+tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
+tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
+tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
+tcuTexRefGetFormat *cuTexRefGetFormat;
+tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
+tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
+tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
+tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
+tcuTexRefGetFlags *cuTexRefGetFlags;
+tcuTexRefCreate *cuTexRefCreate;
+tcuTexRefDestroy *cuTexRefDestroy;
+tcuSurfRefSetArray *cuSurfRefSetArray;
+tcuSurfRefGetArray *cuSurfRefGetArray;
+tcuTexObjectCreate *cuTexObjectCreate;
+tcuTexObjectDestroy *cuTexObjectDestroy;
+tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
+tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
+tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
+tcuSurfObjectCreate *cuSurfObjectCreate;
+tcuSurfObjectDestroy *cuSurfObjectDestroy;
+tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
+tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
+tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
+tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
+tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
+tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
+tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
+tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
+tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
+tcuGraphicsMapResources *cuGraphicsMapResources;
+tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
+tcuGetExportTable *cuGetExportTable;
+
+tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
+tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
+tcuGLGetDevices *cuGLGetDevices;
+tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
+tcuGLInit *cuGLInit;
+tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
+tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
+tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
+tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
+tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
+tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
+tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
+
+
+static void cuewExit(void) {
+ if(lib != NULL) {
+ /* Ignore errors. */
+ dynamic_library_close(lib);
+ lib = NULL;
+ }
+}
+
+/* Implementation function. */
+int cuewInit(void) {
+ /* Library paths. */
+#ifdef _WIN32
+ /* Expected in c:/windows/system or similar, no path needed. */
+ const char *path = "nvcuda.dll";
+#elif defined(__APPLE__)
+ /* Default installation path. */
+ const char *path = "/usr/local/cuda/lib/libcuda.dylib";
+#else
+ const char *path = "libcuda.so";
+#endif
+ static int initialized = 0;
+ static int result = 0;
+ int error, driver_version;
+
+ if (initialized) {
+ return result;
+ }
+
+ initialized = 1;
+
+ error = atexit(cuewExit);
+ if (error) {
+ result = CUEW_ERROR_ATEXIT_FAILED;
+ return result;
+ }
+
+ /* Load library. */
+ lib = dynamic_library_open(path);
+
+ if (lib == NULL) {
+ result = CUEW_ERROR_OPEN_FAILED;
+ return result;
+ }
+
+ /* Detect driver version. */
+ driver_version = 1000;
+
+ CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
+ if (cuDriverGetVersion) {
+ cuDriverGetVersion(&driver_version);
+ }
+
+ /* We require version 4.0. */
+ if (driver_version < 4000) {
+ result = CUEW_ERROR_OPEN_FAILED;
+ return result;
+ }
+ /* Fetch all function pointers. */
+ CUDA_LIBRARY_FIND(cuGetErrorString);
+ CUDA_LIBRARY_FIND(cuGetErrorName);
+ CUDA_LIBRARY_FIND(cuInit);
+ CUDA_LIBRARY_FIND(cuDriverGetVersion);
+ CUDA_LIBRARY_FIND(cuDeviceGet);
+ CUDA_LIBRARY_FIND(cuDeviceGetCount);
+ CUDA_LIBRARY_FIND(cuDeviceGetName);
+ CUDA_LIBRARY_FIND(cuDeviceTotalMem_v2);
+ CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
+ CUDA_LIBRARY_FIND(cuDeviceGetProperties);
+ CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
+ CUDA_LIBRARY_FIND(cuCtxCreate_v2);
+ CUDA_LIBRARY_FIND(cuCtxDestroy_v2);
+ CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2);
+ CUDA_LIBRARY_FIND(cuCtxPopCurrent_v2);
+ CUDA_LIBRARY_FIND(cuCtxSetCurrent);
+ CUDA_LIBRARY_FIND(cuCtxGetCurrent);
+ CUDA_LIBRARY_FIND(cuCtxGetDevice);
+ CUDA_LIBRARY_FIND(cuCtxSynchronize);
+ CUDA_LIBRARY_FIND(cuCtxSetLimit);
+ CUDA_LIBRARY_FIND(cuCtxGetLimit);
+ CUDA_LIBRARY_FIND(cuCtxGetCacheConfig);
+ CUDA_LIBRARY_FIND(cuCtxSetCacheConfig);
+ CUDA_LIBRARY_FIND(cuCtxGetSharedMemConfig);
+ CUDA_LIBRARY_FIND(cuCtxSetSharedMemConfig);
+ CUDA_LIBRARY_FIND(cuCtxGetApiVersion);
+ CUDA_LIBRARY_FIND(cuCtxGetStreamPriorityRange);
+ CUDA_LIBRARY_FIND(cuCtxAttach);
+ CUDA_LIBRARY_FIND(cuCtxDetach);
+ CUDA_LIBRARY_FIND(cuModuleLoad);
+ CUDA_LIBRARY_FIND(cuModuleLoadData);
+ CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
+ CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
+ CUDA_LIBRARY_FIND(cuModuleUnload);
+ CUDA_LIBRARY_FIND(cuModuleGetFunction);
+ CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2);
+ CUDA_LIBRARY_FIND(cuModuleGetTexRef);
+ CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
+ CUDA_LIBRARY_FIND(cuLinkCreate);
+ CUDA_LIBRARY_FIND(cuLinkAddData);
+ CUDA_LIBRARY_FIND(cuLinkAddFile);
+ CUDA_LIBRARY_FIND(cuLinkComplete);
+ CUDA_LIBRARY_FIND(cuLinkDestroy);
+ CUDA_LIBRARY_FIND(cuMemGetInfo_v2);
+ CUDA_LIBRARY_FIND(cuMemAlloc_v2);
+ CUDA_LIBRARY_FIND(cuMemAllocPitch_v2);
+ CUDA_LIBRARY_FIND(cuMemFree_v2);
+ CUDA_LIBRARY_FIND(cuMemGetAddressRange_v2);
+ CUDA_LIBRARY_FIND(cuMemAllocHost_v2);
+ CUDA_LIBRARY_FIND(cuMemFreeHost);
+ CUDA_LIBRARY_FIND(cuMemHostAlloc);
+ CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer_v2);
+ CUDA_LIBRARY_FIND(cuMemHostGetFlags);
+ CUDA_LIBRARY_FIND(cuMemAllocManaged);
+ CUDA_LIBRARY_FIND(cuDeviceGetByPCIBusId);
+ CUDA_LIBRARY_FIND(cuDeviceGetPCIBusId);
+ CUDA_LIBRARY_FIND(cuIpcGetEventHandle);
+ CUDA_LIBRARY_FIND(cuIpcOpenEventHandle);
+ CUDA_LIBRARY_FIND(cuIpcGetMemHandle);
+ CUDA_LIBRARY_FIND(cuIpcOpenMemHandle);
+ CUDA_LIBRARY_FIND(cuIpcCloseMemHandle);
+ CUDA_LIBRARY_FIND(cuMemHostRegister);
+ CUDA_LIBRARY_FIND(cuMemHostUnregister);
+ CUDA_LIBRARY_FIND(cuMemcpy);
+ CUDA_LIBRARY_FIND(cuMemcpyPeer);
+ CUDA_LIBRARY_FIND(cuMemcpyHtoD_v2);
+ CUDA_LIBRARY_FIND(cuMemcpyDtoH_v2);
+ CUDA_LIBRARY_FIND(cuMemcpyDtoD_v2);
+ CUDA_LIBRARY_FIND(cuMemcpyDtoA_v2);
+ CUDA_LIBRARY_FIND(cuMemcpyAtoD_v2);
+ CUDA_LIBRARY_FIND(cuMemcpyHtoA_v2);
+ CUDA_LIBRARY_FIND(cuMemcpyAtoH_v2);
+ CUDA_LIBRARY_FIND(cuMemcpyAtoA_v2);
+ CUDA_LIBRARY_FIND(cuMemcpy2D_v2);
+ CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned_v2);
+ CUDA_LIBRARY_FIND(cuMemcpy3D_v2);
+ CUDA_LIBRARY_FIND(cuMemcpy3DPeer);
+ CUDA_LIBRARY_FIND(cuMemcpyAsync);
+ CUDA_LIBRARY_FIND(cuMemcpyPeerAsync);
+ CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync_v2);
+ CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync_v2);
+ CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync_v2);
+ CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync_v2);
+ CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync_v2);
+ CUDA_LIBRARY_FIND(cuMemcpy2DAsync_v2);
+ CUDA_LIBRARY_FIND(cuMemcpy3DAsync_v2);
+ CUDA_LIBRARY_FIND(cuMemcpy3DPeerAsync);
+ CUDA_LIBRARY_FIND(cuMemsetD8_v2);
+ CUDA_LIBRARY_FIND(cuMemsetD16_v2);
+ CUDA_LIBRARY_FIND(cuMemsetD32_v2);
+ CUDA_LIBRARY_FIND(cuMemsetD2D8_v2);
+ CUDA_LIBRARY_FIND(cuMemsetD2D16_v2);
+ CUDA_LIBRARY_FIND(cuMemsetD2D32_v2);
+ CUDA_LIBRARY_FIND(cuMemsetD8Async);
+ CUDA_LIBRARY_FIND(cuMemsetD16Async);
+ CUDA_LIBRARY_FIND(cuMemsetD32Async);
+ CUDA_LIBRARY_FIND(cuMemsetD2D8Async);
+ CUDA_LIBRARY_FIND(cuMemsetD2D16Async);
+ CUDA_LIBRARY_FIND(cuMemsetD2D32Async);
+ CUDA_LIBRARY_FIND(cuArrayCreate_v2);
+ CUDA_LIBRARY_FIND(cuArrayGetDescriptor_v2);
+ CUDA_LIBRARY_FIND(cuArrayDestroy);
+ CUDA_LIBRARY_FIND(cuArray3DCreate_v2);
+ CUDA_LIBRARY_FIND(cuArray3DGetDescriptor_v2);
+ CUDA_LIBRARY_FIND(cuMipmappedArrayCreate);
+ CUDA_LIBRARY_FIND(cuMipmappedArrayGetLevel);
+ CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy);
+ CUDA_LIBRARY_FIND(cuPointerGetAttribute);
+ CUDA_LIBRARY_FIND(cuPointerSetAttribute);
+ CUDA_LIBRARY_FIND(cuStreamCreate);
+ CUDA_LIBRARY_FIND(cuStreamCreateWithPriority);
+ CUDA_LIBRARY_FIND(cuStreamGetPriority);
+ CUDA_LIBRARY_FIND(cuStreamGetFlags);
+ CUDA_LIBRARY_FIND(cuStreamWaitEvent);
+ CUDA_LIBRARY_FIND(cuStreamAddCallback);
+ CUDA_LIBRARY_FIND(cuStreamAttachMemAsync);
+ CUDA_LIBRARY_FIND(cuStreamQuery);
+ CUDA_LIBRARY_FIND(cuStreamSynchronize);
+ CUDA_LIBRARY_FIND(cuStreamDestroy_v2);
+ CUDA_LIBRARY_FIND(cuEventCreate);
+ CUDA_LIBRARY_FIND(cuEventRecord);
+ CUDA_LIBRARY_FIND(cuEventQuery);
+ CUDA_LIBRARY_FIND(cuEventSynchronize);
+ CUDA_LIBRARY_FIND(cuEventDestroy_v2);
+ CUDA_LIBRARY_FIND(cuEventElapsedTime);
+ CUDA_LIBRARY_FIND(cuFuncGetAttribute);
+ CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
+ CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig);
+ CUDA_LIBRARY_FIND(cuLaunchKernel);
+ CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
+ CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
+ CUDA_LIBRARY_FIND(cuParamSetSize);
+ CUDA_LIBRARY_FIND(cuParamSeti);
+ CUDA_LIBRARY_FIND(cuParamSetf);
+ CUDA_LIBRARY_FIND(cuParamSetv);
+ CUDA_LIBRARY_FIND(cuLaunch);
+ CUDA_LIBRARY_FIND(cuLaunchGrid);
+ CUDA_LIBRARY_FIND(cuLaunchGridAsync);
+ CUDA_LIBRARY_FIND(cuParamSetTexRef);
+ CUDA_LIBRARY_FIND(cuTexRefSetArray);
+ CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray);
+ CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2);
+ CUDA_LIBRARY_FIND(cuTexRefSetAddress2D_v3);
+ CUDA_LIBRARY_FIND(cuTexRefSetFormat);
+ CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
+ CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
+ CUDA_LIBRARY_FIND(cuTexRefSetMipmapFilterMode);
+ CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelBias);
+ CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelClamp);
+ CUDA_LIBRARY_FIND(cuTexRefSetMaxAnisotropy);
+ CUDA_LIBRARY_FIND(cuTexRefSetFlags);
+ CUDA_LIBRARY_FIND(cuTexRefGetAddress_v2);
+ CUDA_LIBRARY_FIND(cuTexRefGetArray);
+ CUDA_LIBRARY_FIND(cuTexRefGetMipmappedArray);
+ CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
+ CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
+ CUDA_LIBRARY_FIND(cuTexRefGetFormat);
+ CUDA_LIBRARY_FIND(cuTexRefGetMipmapFilterMode);
+ CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelBias);
+ CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelClamp);
+ CUDA_LIBRARY_FIND(cuTexRefGetMaxAnisotropy);
+ CUDA_LIBRARY_FIND(cuTexRefGetFlags);
+ CUDA_LIBRARY_FIND(cuTexRefCreate);
+ CUDA_LIBRARY_FIND(cuTexRefDestroy);
+ CUDA_LIBRARY_FIND(cuSurfRefSetArray);
+ CUDA_LIBRARY_FIND(cuSurfRefGetArray);
+ CUDA_LIBRARY_FIND(cuTexObjectCreate);
+ CUDA_LIBRARY_FIND(cuTexObjectDestroy);
+ CUDA_LIBRARY_FIND(cuTexObjectGetResourceDesc);
+ CUDA_LIBRARY_FIND(cuTexObjectGetTextureDesc);
+ CUDA_LIBRARY_FIND(cuTexObjectGetResourceViewDesc);
+ CUDA_LIBRARY_FIND(cuSurfObjectCreate);
+ CUDA_LIBRARY_FIND(cuSurfObjectDestroy);
+ CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc);
+ CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer);
+ CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess);
+ CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess);
+ CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
+ CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
+ CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
+ CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2);
+ CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
+ CUDA_LIBRARY_FIND(cuGraphicsMapResources);
+ CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
+ CUDA_LIBRARY_FIND(cuGetExportTable);
+
+ CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
+ CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
+ CUDA_LIBRARY_FIND(cuGLGetDevices);
+ CUDA_LIBRARY_FIND(cuGLCtxCreate_v2);
+ CUDA_LIBRARY_FIND(cuGLInit);
+ CUDA_LIBRARY_FIND(cuGLRegisterBufferObject);
+ CUDA_LIBRARY_FIND(cuGLMapBufferObject_v2);
+ CUDA_LIBRARY_FIND(cuGLUnmapBufferObject);
+ CUDA_LIBRARY_FIND(cuGLUnregisterBufferObject);
+ CUDA_LIBRARY_FIND(cuGLSetBufferObjectMapFlags);
+ CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2);
+ CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync);
+
+
+ result = CUEW_SUCCESS;
+ return result;
+}
+
+const char *cuewErrorString(CUresult result) {
+ switch(result) {
+ case CUDA_SUCCESS: return "No errors";
+ case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
+ case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
+ case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
+ case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
+ case CUDA_ERROR_PROFILER_DISABLED: return "PROFILER_DISABLED";
+ case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "PROFILER_NOT_INITIALIZED";
+ case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "PROFILER_ALREADY_STARTED";
+ case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "PROFILER_ALREADY_STOPPED";
+ case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
+ case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
+ case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
+ case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context";
+ case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "Context already current";
+ case CUDA_ERROR_MAP_FAILED: return "Map failed";
+ case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed";
+ case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped";
+ case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped";
+ case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU";
+ case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired";
+ case CUDA_ERROR_NOT_MAPPED: return "Not mapped";
+ case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array";
+ case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
+ case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
+ case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
+ case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "CONTEXT_ALREADY_IN_USE";
+ case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "PEER_ACCESS_UNSUPPORTED";
+ case CUDA_ERROR_INVALID_PTX: return "INVALID_PTX";
+ case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
+ case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
+ case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
+ case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
+ case CUDA_ERROR_OPERATING_SYSTEM: return "OPERATING_SYSTEM";
+ case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
+ case CUDA_ERROR_NOT_FOUND: return "Not found";
+ case CUDA_ERROR_NOT_READY: return "CUDA not ready";
+ case CUDA_ERROR_ILLEGAL_ADDRESS: return "ILLEGAL_ADDRESS";
+ case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
+ case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout";
+ case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
+ case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "PEER_ACCESS_ALREADY_ENABLED";
+ case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "PEER_ACCESS_NOT_ENABLED";
+ case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "PRIMARY_CONTEXT_ACTIVE";
+ case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "CONTEXT_IS_DESTROYED";
+ case CUDA_ERROR_ASSERT: return "ASSERT";
+ case CUDA_ERROR_TOO_MANY_PEERS: return "TOO_MANY_PEERS";
+ case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "HOST_MEMORY_ALREADY_REGISTERED";
+ case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "HOST_MEMORY_NOT_REGISTERED";
+ case CUDA_ERROR_HARDWARE_STACK_ERROR: return "HARDWARE_STACK_ERROR";
+ case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "ILLEGAL_INSTRUCTION";
+ case CUDA_ERROR_MISALIGNED_ADDRESS: return "MISALIGNED_ADDRESS";
+ case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "INVALID_ADDRESS_SPACE";
+ case CUDA_ERROR_INVALID_PC: return "INVALID_PC";
+ case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
+ case CUDA_ERROR_NOT_PERMITTED: return "NOT_PERMITTED";
+ case CUDA_ERROR_NOT_SUPPORTED: return "NOT_SUPPORTED";
+ case CUDA_ERROR_UNKNOWN: return "Unknown error";
+ default: return "Unknown CUDA error value";
+ }
+}
+
+static void path_join(const char *path1,
+ const char *path2,
+ int maxlen,
+ char *result) {
+#if defined(WIN32) || defined(_WIN32)
+ const char separator = '\\';
+#else
+ const char separator = '/';
+#endif
+ int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
+ if (n != -1 && n < maxlen) {
+ result[n] = '\0';
+ }
+ else {
+ result[maxlen - 1] = '\0';
+ }
+}
+
+static int path_exists(const char *path) {
+ struct stat st;
+ if (stat(path, &st)) {
+ return 0;
+ }
+ return 1;
+}
+
+const char *cuewCompilerPath(void) {
+#ifdef _WIN32
+ const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
+ const char *executable = "nvcc.exe";
+#else
+ const char *defaultpaths[] = {
+ "/Developer/NVIDIA/CUDA-5.0/bin",
+ "/usr/local/cuda-5.0/bin",
+ "/usr/local/cuda/bin",
+ "/Developer/NVIDIA/CUDA-6.0/bin",
+ "/usr/local/cuda-6.0/bin",
+ "/Developer/NVIDIA/CUDA-5.5/bin",
+ "/usr/local/cuda-5.5/bin",
+ NULL};
+ const char *executable = "nvcc";
+#endif
+ int i;
+
+ const char *binpath = getenv("CUDA_BIN_PATH");
+
+ static char nvcc[65536];
+
+ if (binpath) {
+ path_join(binpath, executable, sizeof(nvcc), nvcc);
+ if (path_exists(nvcc))
+ return nvcc;
+ }
+
+ for (i = 0; defaultpaths[i]; ++i) {
+ path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
+ if (path_exists(nvcc))
+ return nvcc;
+ }
+
+#ifndef _WIN32
+ {
+ FILE *handle = popen("which nvcc", "r");
+ if (handle) {
+ char buffer[4096] = {0};
+ int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
+ buffer[len] = '\0';
+ pclose(handle);
+
+ if (buffer[0])
+ return "nvcc";
+ }
+ }
+#endif
+
+ return NULL;
+}
+
+int cuewCompilerVersion(void) {
+ const char *path = cuewCompilerPath();
+ const char *marker = "Cuda compilation tools, release ";
+ FILE *pipe;
+ int major, minor;
+ char *versionstr;
+ char buf[128];
+ char output[65536] = "\0";
+ char command[65536] = "\0";
+
+ if (path == NULL)
+ return 0;
+
+ /* get --version output */
+ strncpy(command, path, sizeof(command));
+ strncat(command, " --version", sizeof(command) - strlen(path));
+ pipe = popen(command, "r");
+ if (!pipe) {
+ fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
+ return 0;
+ }
+
+ while (!feof(pipe)) {
+ if (fgets(buf, sizeof(buf), pipe) != NULL) {
+ strncat(output, buf, sizeof(output) - strlen(output) - 1 );
+ }
+ }
+
+ pclose(pipe);
+
+ /* parse version number */
+ versionstr = strstr(output, marker);
+ if (versionstr == NULL) {
+ fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output);
+ return 0;
+ }
+ versionstr += strlen(marker);
+
+ if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
+ fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output);
+ return 0;
+ }
+
+ return 10 * major + minor;
+}
+