Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc')
-rw-r--r--intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc135
1 files changed, 71 insertions, 64 deletions
diff --git a/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc b/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc
index f5bc12efa84..5ce8af0434e 100644
--- a/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc
+++ b/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc
@@ -24,68 +24,70 @@
#ifdef OPENSUBDIV_HAS_CUDA
-#ifdef _MSC_VER
-# include <iso646.h>
-#endif
+# ifdef _MSC_VER
+# include <iso646.h>
+# endif
-#include "opensubdiv_device_context_cuda.h"
+# include "opensubdiv_device_context_cuda.h"
-#if defined(_WIN32)
-# include <windows.h>
-#elif defined(__APPLE__)
-# include <OpenGL/OpenGL.h>
-#else
-# include <GL/glx.h>
-# include <X11/Xlib.h>
-#endif
+# if defined(_WIN32)
+# include <windows.h>
+# elif defined(__APPLE__)
+# include <OpenGL/OpenGL.h>
+# else
+# include <GL/glx.h>
+# include <X11/Xlib.h>
+# endif
-#include <cuda.h>
-#include <cuda_gl_interop.h>
-#include <cuda_runtime_api.h>
-#include <cstdio>
+# include <cuda.h>
+# include <cuda_gl_interop.h>
+# include <cuda_runtime_api.h>
+# include <cstdio>
-#include "internal/opensubdiv_util.h"
+# include "internal/opensubdiv_util.h"
-#define message(fmt, ...)
+# define message(fmt, ...)
// #define message(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
-#define error(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
+# define error(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
namespace {
-int getCudaDeviceForCurrentGLContext() {
+int getCudaDeviceForCurrentGLContext()
+{
// Find and use the CUDA device for the current GL context
unsigned int interop_device_count = 0;
int interopDevices[1];
- cudaError_t status = cudaGLGetDevices(&interop_device_count,
- interopDevices,
- 1,
- cudaGLDeviceListCurrentFrame);
+ cudaError_t status = cudaGLGetDevices(
+ &interop_device_count, interopDevices, 1, cudaGLDeviceListCurrentFrame);
if (status == cudaErrorNoDevice || interop_device_count != 1) {
message("CUDA no interop devices found.\n");
return 0;
}
int device = interopDevices[0];
-#if defined(_WIN32)
+# if defined(_WIN32)
return device;
-#elif defined(__APPLE__)
+# elif defined(__APPLE__)
return device;
-#else // X11
- Display* display = glXGetCurrentDisplay();
+# else // X11
+ Display *display = glXGetCurrentDisplay();
int screen = DefaultScreen(display);
if (device != screen) {
- error("The CUDA interop device (%d) does not match "
- "the screen used by the current GL context (%d), "
- "which may cause slow performance on systems "
- "with multiple GPU devices.",
- device, screen);
+ error(
+ "The CUDA interop device (%d) does not match "
+ "the screen used by the current GL context (%d), "
+ "which may cause slow performance on systems "
+ "with multiple GPU devices.",
+ device,
+ screen);
}
message("CUDA init using device for current GL context: %d\n", device);
return device;
-#endif
+# endif
}
// Beginning of GPU Architecture definitions.
-int convertSMVer2Cores_local(int major, int minor) {
+int convertSMVer2Cores_local(int major, int minor)
+{
// Defines for GPU Architecture types (using the SM version to determine
// the # of cores per SM
typedef struct {
@@ -95,15 +97,14 @@ int convertSMVer2Cores_local(int major, int minor) {
int Cores;
} sSMtoCores;
- sSMtoCores nGpuArchCoresPerSM[] = {
- {0x10, 8}, // Tesla Generation (SM 1.0) G80 class.
- {0x11, 8}, // Tesla Generation (SM 1.1) G8x class.
- {0x12, 8}, // Tesla Generation (SM 1.2) G9x class.
- {0x13, 8}, // Tesla Generation (SM 1.3) GT200 class.
- {0x20, 32}, // Fermi Generation (SM 2.0) GF100 class.
- {0x21, 48}, // Fermi Generation (SM 2.1) GF10x class.
- {0x30, 192}, // Fermi Generation (SM 3.0) GK10x class.
- {-1, -1}};
+ sSMtoCores nGpuArchCoresPerSM[] = {{0x10, 8}, // Tesla Generation (SM 1.0) G80 class.
+ {0x11, 8}, // Tesla Generation (SM 1.1) G8x class.
+ {0x12, 8}, // Tesla Generation (SM 1.2) G9x class.
+ {0x13, 8}, // Tesla Generation (SM 1.3) GT200 class.
+ {0x20, 32}, // Fermi Generation (SM 2.0) GF100 class.
+ {0x21, 48}, // Fermi Generation (SM 2.1) GF10x class.
+ {0x30, 192}, // Fermi Generation (SM 3.0) GK10x class.
+ {-1, -1}};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1) {
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
@@ -116,7 +117,8 @@ int convertSMVer2Cores_local(int major, int minor) {
}
// This function returns the best GPU (with maximum GFLOPS).
-int cutGetMaxGflopsDeviceId() {
+int cutGetMaxGflopsDeviceId()
+{
int current_device = 0, sm_per_multiproc = 0;
int max_compute_perf = 0, max_perf_device = -1;
int device_count = 0, best_SM_arch = 0;
@@ -136,16 +138,15 @@ int cutGetMaxGflopsDeviceId() {
cuDeviceComputeCapability(&compat_major, &compat_minor, current_device);
if (compat_major == 9999 && compat_minor == 9999) {
sm_per_multiproc = 1;
- } else {
+ }
+ else {
sm_per_multiproc = convertSMVer2Cores_local(compat_major, compat_minor);
}
int multi_processor_count;
- cuDeviceGetAttribute(&multi_processor_count,
- CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- current_device);
+ cuDeviceGetAttribute(
+ &multi_processor_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, current_device);
int clock_rate;
- cuDeviceGetAttribute(&clock_rate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
- current_device);
+ cuDeviceGetAttribute(&clock_rate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, current_device);
int compute_perf = multi_processor_count * sm_per_multiproc * clock_rate;
if (compute_perf > max_compute_perf) {
/* If we find GPU with SM major > 2, search only these */
@@ -155,7 +156,8 @@ int cutGetMaxGflopsDeviceId() {
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
- } else {
+ }
+ else {
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
@@ -167,19 +169,20 @@ int cutGetMaxGflopsDeviceId() {
} // namespace
-bool CudaDeviceContext::HAS_CUDA_VERSION_4_0() {
-#ifdef OPENSUBDIV_HAS_CUDA
+bool CudaDeviceContext::HAS_CUDA_VERSION_4_0()
+{
+# ifdef OPENSUBDIV_HAS_CUDA
static bool cuda_initialized = false;
static bool cuda_load_success = true;
if (!cuda_initialized) {
cuda_initialized = true;
-#ifdef OPENSUBDIV_HAS_CUEW
+# ifdef OPENSUBDIV_HAS_CUEW
cuda_load_success = cuewInit(CUEW_INIT_CUDA) == CUEW_SUCCESS;
if (!cuda_load_success) {
fprintf(stderr, "Loading CUDA failed.\n");
}
-#endif
+# endif
// Need to initialize CUDA here so getting device
// with the maximum FPLOS works fine.
if (cuInit(0) == CUDA_SUCCESS) {
@@ -189,25 +192,28 @@ bool CudaDeviceContext::HAS_CUDA_VERSION_4_0() {
if (cutGetMaxGflopsDeviceId() < 0) {
cuda_load_success = false;
}
- } else {
+ }
+ else {
cuda_load_success = false;
}
}
return cuda_load_success;
-#else
+# else
return false;
-#endif
+# endif
}
-CudaDeviceContext::CudaDeviceContext()
- : initialized_(false) {
+CudaDeviceContext::CudaDeviceContext() : initialized_(false)
+{
}
-CudaDeviceContext::~CudaDeviceContext() {
+CudaDeviceContext::~CudaDeviceContext()
+{
cudaDeviceReset();
}
-bool CudaDeviceContext::Initialize() {
+bool CudaDeviceContext::Initialize()
+{
// See if any cuda device is available.
int device_count = 0;
cudaGetDeviceCount(&device_count);
@@ -220,7 +226,8 @@ bool CudaDeviceContext::Initialize() {
return true;
}
-bool CudaDeviceContext::IsInitialized() const {
+bool CudaDeviceContext::IsInitialized() const
+{
return initialized_;
}