Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc')
-rw-r--r--intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc226
1 files changed, 226 insertions, 0 deletions
diff --git a/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc b/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc
new file mode 100644
index 00000000000..875f503b9ab
--- /dev/null
+++ b/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc
@@ -0,0 +1,226 @@
+// Adopted from OpenSubdiv with the following license:
+//
+// Copyright 2015 Pixar
+//
+// Licensed under the Apache License, Version 2.0 (the "Apache License")
+// with the following modification; you may not use this file except in
+// compliance with the Apache License and the following modification to it:
+// Section 6. Trademarks. is deleted and replaced with:
+//
+// 6. Trademarks. This License does not grant permission to use the trade
+// names, trademarks, service marks, or product names of the Licensor
+// and its affiliates, except as required to comply with Section 4(c) of
+// the License and to reproduce the content of the NOTICE file.
+//
+// You may obtain a copy of the Apache License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the Apache License with the above modification is
+// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the Apache License for the specific
+// language governing permissions and limitations under the Apache License.
+
+#ifdef OPENSUBDIV_HAS_CUDA
+
+#ifdef _MSC_VER
+# include <iso646.h>
+#endif
+
+#include "opensubdiv_device_context_cuda.h"
+
+#if defined(_WIN32)
+# include <windows.h>
+#elif defined(__APPLE__)
+# include <OpenGL/OpenGL.h>
+#else
+# include <GL/glx.h>
+# include <X11/Xlib.h>
+#endif
+
+#include <cuda.h>
+#include <cuda_gl_interop.h>
+#include <cuda_runtime_api.h>
+#include <algorithm>
+#include <cstdio>
+
+#define message(fmt, ...)
+// #define message(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
+#define error(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
+
+namespace {
+
+int getCudaDeviceForCurrentGLContext() {
+ // Find and use the CUDA device for the current GL context
+ unsigned int interop_device_count = 0;
+ int interopDevices[1];
+ cudaError_t status = cudaGLGetDevices(&interop_device_count,
+ interopDevices,
+ 1,
+ cudaGLDeviceListCurrentFrame);
+ if (status == cudaErrorNoDevice || interop_device_count != 1) {
+ message("CUDA no interop devices found.\n");
+ return 0;
+ }
+ int device = interopDevices[0];
+#if defined(_WIN32)
+ return device;
+#elif defined(__APPLE__)
+ return device;
+#else // X11
+ Display* display = glXGetCurrentDisplay();
+ int screen = DefaultScreen(display);
+ if (device != screen) {
+ error("The CUDA interop device (%d) does not match "
+ "the screen used by the current GL context (%d), "
+ "which may cause slow performance on systems "
+ "with multiple GPU devices.",
+ device, screen);
+ }
+ message("CUDA init using device for current GL context: %d\n", device);
+ return device;
+#endif
+}
+
+// Beginning of GPU Architecture definitions.
+int convertSMVer2Cores_local(int major, int minor) {
+ // Defines for GPU Architecture types (using the SM version to determine
+ // the # of cores per SM
+ typedef struct {
+ int SM; // 0xMm (hexidecimal notation),
+ // M = SM Major version,
+ // and m = SM minor version
+ int Cores;
+ } sSMtoCores;
+
+ sSMtoCores nGpuArchCoresPerSM[] = {
+ {0x10, 8}, // Tesla Generation (SM 1.0) G80 class.
+ {0x11, 8}, // Tesla Generation (SM 1.1) G8x class.
+ {0x12, 8}, // Tesla Generation (SM 1.2) G9x class.
+ {0x13, 8}, // Tesla Generation (SM 1.3) GT200 class.
+ {0x20, 32}, // Fermi Generation (SM 2.0) GF100 class.
+ {0x21, 48}, // Fermi Generation (SM 2.1) GF10x class.
+ {0x30, 192}, // Fermi Generation (SM 3.0) GK10x class.
+ {-1, -1}};
+ int index = 0;
+ while (nGpuArchCoresPerSM[index].SM != -1) {
+ if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
+ return nGpuArchCoresPerSM[index].Cores;
+ }
+ index++;
+ }
+ printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
+ return -1;
+}
+
+// This function returns the best GPU (with maximum GFLOPS).
+int cutGetMaxGflopsDeviceId() {
+ int current_device = 0, sm_per_multiproc = 0;
+ int max_compute_perf = 0, max_perf_device = -1;
+ int device_count = 0, best_SM_arch = 0;
+ int compat_major, compat_minor;
+ cuDeviceGetCount(&device_count);
+ // Find the best major SM Architecture GPU device.
+ while (current_device < device_count) {
+ cuDeviceComputeCapability(&compat_major, &compat_minor, current_device);
+ if (compat_major > 0 && compat_major < 9999) {
+ best_SM_arch = std::max(best_SM_arch, compat_major);
+ }
+ current_device++;
+ }
+ // Find the best CUDA capable GPU device.
+ current_device = 0;
+ while (current_device < device_count) {
+ cuDeviceComputeCapability(&compat_major, &compat_minor, current_device);
+ if (compat_major == 9999 && compat_minor == 9999) {
+ sm_per_multiproc = 1;
+ } else {
+ sm_per_multiproc = convertSMVer2Cores_local(compat_major, compat_minor);
+ }
+ int multi_processor_count;
+ cuDeviceGetAttribute(&multi_processor_count,
+ CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ current_device);
+ int clock_rate;
+ cuDeviceGetAttribute(&clock_rate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
+ current_device);
+ int compute_perf = multi_processor_count * sm_per_multiproc * clock_rate;
+ if (compute_perf > max_compute_perf) {
+ /* If we find GPU with SM major > 2, search only these */
+ if (best_SM_arch > 2) {
+ /* If our device==dest_SM_arch, choose this, or else pass. */
+ if (compat_major == best_SM_arch) {
+ max_compute_perf = compute_perf;
+ max_perf_device = current_device;
+ }
+ } else {
+ max_compute_perf = compute_perf;
+ max_perf_device = current_device;
+ }
+ }
+ ++current_device;
+ }
+ return max_perf_device;
+}
+
+} // namespace
+
+bool CudaDeviceContext::HAS_CUDA_VERSION_4_0() {
+#ifdef OPENSUBDIV_HAS_CUDA
+ static bool cuda_initialized = false;
+ static bool cuda_load_success = true;
+ if (!cuda_initialized) {
+ cuda_initialized = true;
+
+#ifdef OPENSUBDIV_HAS_CUEW
+ cuda_load_success = cuewInit(CUEW_INIT_CUDA) == CUEW_SUCCESS;
+ if (!cuda_load_success) {
+ fprintf(stderr, "Loading CUDA failed.\n");
+ }
+#endif
+ // Need to initialize CUDA here so getting device
+ // with the maximum FPLOS works fine.
+ if (cuInit(0) == CUDA_SUCCESS) {
+ // This is to deal with cases like NVidia Optimus,
+ // when there might be CUDA library installed but
+ // NVidia card is not being active.
+ if (cutGetMaxGflopsDeviceId() < 0) {
+ cuda_load_success = false;
+ }
+ } else {
+ cuda_load_success = false;
+ }
+ }
+ return cuda_load_success;
+#else
+ return false;
+#endif
+}
+
+CudaDeviceContext::CudaDeviceContext()
+ : initialized_(false) {
+}
+
+CudaDeviceContext::~CudaDeviceContext() {
+ cudaDeviceReset();
+}
+
+bool CudaDeviceContext::Initialize() {
+ // See if any cuda device is available.
+ int device_count = 0;
+ cudaGetDeviceCount(&device_count);
+ message("CUDA device count: %d\n", device_count);
+ if (device_count <= 0) {
+ return false;
+ }
+ cudaGLSetGLDevice(getCudaDeviceForCurrentGLContext());
+ initialized_ = true;
+ return true;
+}
+
+bool CudaDeviceContext::IsInitialized() const {
+ return initialized_;
+}
+
+#endif // OPENSUBDIV_HAS_CUDA