1 files changed, 71 insertions, 64 deletions
diff --git a/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc b/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc
index f5bc12efa84..5ce8af0434e 100644
--- a/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc
+++ b/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc
@@ -24,68 +24,70 @@
 
 #ifdef OPENSUBDIV_HAS_CUDA
 
-#ifdef _MSC_VER
-#  include <iso646.h>
-#endif
+#  ifdef _MSC_VER
+#    include <iso646.h>
+#  endif
 
-#include "opensubdiv_device_context_cuda.h"
+#  include "opensubdiv_device_context_cuda.h"
 
-#if defined(_WIN32)
-#  include <windows.h>
-#elif defined(__APPLE__)
-#  include <OpenGL/OpenGL.h>
-#else
-#  include <GL/glx.h>
-#  include <X11/Xlib.h>
-#endif
+#  if defined(_WIN32)
+#    include <windows.h>
+#  elif defined(__APPLE__)
+#    include <OpenGL/OpenGL.h>
+#  else
+#    include <GL/glx.h>
+#    include <X11/Xlib.h>
+#  endif
 
-#include <cuda.h>
-#include <cuda_gl_interop.h>
-#include <cuda_runtime_api.h>
-#include <cstdio>
+#  include <cuda.h>
+#  include <cuda_gl_interop.h>
+#  include <cuda_runtime_api.h>
+#  include <cstdio>
 
-#include "internal/opensubdiv_util.h"
+#  include "internal/opensubdiv_util.h"
 
-#define message(fmt, ...)
+#  define message(fmt, ...)
 // #define message(fmt, ...)  fprintf(stderr, fmt, __VA_ARGS__)
-#define error(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
+#  define error(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
 
 namespace {
 
-int getCudaDeviceForCurrentGLContext() {
+int getCudaDeviceForCurrentGLContext()
+{
   // Find and use the CUDA device for the current GL context
   unsigned int interop_device_count = 0;
   int interopDevices[1];
-  cudaError_t status = cudaGLGetDevices(&interop_device_count,
-                                        interopDevices,
-                                        1,
-                                        cudaGLDeviceListCurrentFrame);
+  cudaError_t status = cudaGLGetDevices(
+      &interop_device_count, interopDevices, 1, cudaGLDeviceListCurrentFrame);
   if (status == cudaErrorNoDevice || interop_device_count != 1) {
     message("CUDA no interop devices found.\n");
     return 0;
   }
   int device = interopDevices[0];
-#if defined(_WIN32)
+#  if defined(_WIN32)
   return device;
-#elif defined(__APPLE__)
+#  elif defined(__APPLE__)
   return device;
-#else  // X11
-  Display* display = glXGetCurrentDisplay();
+#  else  // X11
+  Display *display = glXGetCurrentDisplay();
   int screen = DefaultScreen(display);
   if (device != screen) {
-    error("The CUDA interop device (%d) does not match "
-          "the screen used by the current GL context (%d), "
-          "which may cause slow performance on systems "
-          "with multiple GPU devices.",
-          device, screen);
+    error(
+        "The CUDA interop device (%d) does not match "
+        "the screen used by the current GL context (%d), "
+        "which may cause slow performance on systems "
+        "with multiple GPU devices.",
+        device,
+        screen);
   }
   message("CUDA init using device for current GL context: %d\n", device);
   return device;
-#endif
+#  endif
 }
 
 // Beginning of GPU Architecture definitions.
-int convertSMVer2Cores_local(int major, int minor) {
+int convertSMVer2Cores_local(int major, int minor)
+{
   // Defines for GPU Architecture types (using the SM version to determine
   // the # of cores per SM
   typedef struct {
@@ -95,15 +97,14 @@ int convertSMVer2Cores_local(int major, int minor) {
     int Cores;
   } sSMtoCores;
 
-  sSMtoCores nGpuArchCoresPerSM[] = {
-      {0x10, 8},    // Tesla Generation (SM 1.0) G80 class.
-      {0x11, 8},    // Tesla Generation (SM 1.1) G8x class.
-      {0x12, 8},    // Tesla Generation (SM 1.2) G9x class.
-      {0x13, 8},    // Tesla Generation (SM 1.3) GT200 class.
-      {0x20, 32},   // Fermi Generation (SM 2.0) GF100 class.
-      {0x21, 48},   // Fermi Generation (SM 2.1) GF10x class.
-      {0x30, 192},  // Fermi Generation (SM 3.0) GK10x class.
-      {-1, -1}};
+  sSMtoCores nGpuArchCoresPerSM[] = {{0x10, 8},    // Tesla Generation (SM 1.0) G80 class.
+                                     {0x11, 8},    // Tesla Generation (SM 1.1) G8x class.
+                                     {0x12, 8},    // Tesla Generation (SM 1.2) G9x class.
+                                     {0x13, 8},    // Tesla Generation (SM 1.3) GT200 class.
+                                     {0x20, 32},   // Fermi Generation (SM 2.0) GF100 class.
+                                     {0x21, 48},   // Fermi Generation (SM 2.1) GF10x class.
+                                     {0x30, 192},  // Fermi Generation (SM 3.0) GK10x class.
+                                     {-1, -1}};
   int index = 0;
   while (nGpuArchCoresPerSM[index].SM != -1) {
     if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
@@ -116,7 +117,8 @@ int convertSMVer2Cores_local(int major, int minor) {
 }
 
 // This function returns the best GPU (with maximum GFLOPS).
-int cutGetMaxGflopsDeviceId() {
+int cutGetMaxGflopsDeviceId()
+{
   int current_device = 0, sm_per_multiproc = 0;
   int max_compute_perf = 0, max_perf_device = -1;
   int device_count = 0, best_SM_arch = 0;
@@ -136,16 +138,15 @@ int cutGetMaxGflopsDeviceId() {
     cuDeviceComputeCapability(&compat_major, &compat_minor, current_device);
     if (compat_major == 9999 && compat_minor == 9999) {
       sm_per_multiproc = 1;
-    } else {
+    }
+    else {
       sm_per_multiproc = convertSMVer2Cores_local(compat_major, compat_minor);
     }
     int multi_processor_count;
-    cuDeviceGetAttribute(&multi_processor_count,
-                         CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
-                         current_device);
+    cuDeviceGetAttribute(
+        &multi_processor_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, current_device);
     int clock_rate;
-    cuDeviceGetAttribute(&clock_rate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
-                         current_device);
+    cuDeviceGetAttribute(&clock_rate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, current_device);
     int compute_perf = multi_processor_count * sm_per_multiproc * clock_rate;
     if (compute_perf > max_compute_perf) {
       /* If we find GPU with SM major > 2, search only these */
@@ -155,7 +156,8 @@ int cutGetMaxGflopsDeviceId() {
           max_compute_perf = compute_perf;
           max_perf_device = current_device;
         }
-      } else {
+      }
+      else {
         max_compute_perf = compute_perf;
         max_perf_device = current_device;
       }
@@ -167,19 +169,20 @@ int cutGetMaxGflopsDeviceId() {
 
 }  // namespace
 
-bool CudaDeviceContext::HAS_CUDA_VERSION_4_0() {
-#ifdef OPENSUBDIV_HAS_CUDA
+bool CudaDeviceContext::HAS_CUDA_VERSION_4_0()
+{
+#  ifdef OPENSUBDIV_HAS_CUDA
   static bool cuda_initialized = false;
   static bool cuda_load_success = true;
   if (!cuda_initialized) {
     cuda_initialized = true;
 
-#ifdef OPENSUBDIV_HAS_CUEW
+#    ifdef OPENSUBDIV_HAS_CUEW
     cuda_load_success = cuewInit(CUEW_INIT_CUDA) == CUEW_SUCCESS;
     if (!cuda_load_success) {
       fprintf(stderr, "Loading CUDA failed.\n");
     }
-#endif
+#    endif
     // Need to initialize CUDA here so getting device
     // with the maximum FPLOS works fine.
     if (cuInit(0) == CUDA_SUCCESS) {
@@ -189,25 +192,28 @@ bool CudaDeviceContext::HAS_CUDA_VERSION_4_0() {
       if (cutGetMaxGflopsDeviceId() < 0) {
         cuda_load_success = false;
       }
-    } else {
+    }
+    else {
       cuda_load_success = false;
     }
   }
   return cuda_load_success;
-#else
+#  else
   return false;
-#endif
+#  endif
 }
 
-CudaDeviceContext::CudaDeviceContext()
-    : initialized_(false) {
+CudaDeviceContext::CudaDeviceContext() : initialized_(false)
+{
 }
 
-CudaDeviceContext::~CudaDeviceContext() {
+CudaDeviceContext::~CudaDeviceContext()
+{
   cudaDeviceReset();
 }
 
-bool CudaDeviceContext::Initialize() {
+bool CudaDeviceContext::Initialize()
+{
   // See if any cuda device is available.
   int device_count = 0;
   cudaGetDeviceCount(&device_count);
@@ -220,7 +226,8 @@ bool CudaDeviceContext::Initialize() {
   return true;
 }
 
-bool CudaDeviceContext::IsInitialized() const {
+bool CudaDeviceContext::IsInitialized() const
+{
   return initialized_;
 }