diff options
author | Campbell Barton <ideasman42@gmail.com> | 2019-04-17 07:17:24 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2019-04-17 07:21:24 +0300 |
commit | e12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 (patch) | |
tree | 8cf3453d12edb177a218ef8009357518ec6cab6a /intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc | |
parent | b3dabc200a4b0399ec6b81f2ff2730d07b44fcaa (diff) |
ClangFormat: apply to source, most of intern
Apply clang format as proposed in T53211.
For details on usage and instructions for migrating branches
without conflicts, see:
https://wiki.blender.org/wiki/Tools/ClangFormat
Diffstat (limited to 'intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc')
-rw-r--r-- | intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc | 135 |
1 files changed, 71 insertions, 64 deletions
diff --git a/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc b/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc index f5bc12efa84..5ce8af0434e 100644 --- a/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc +++ b/intern/opensubdiv/internal/opensubdiv_device_context_cuda.cc @@ -24,68 +24,70 @@ #ifdef OPENSUBDIV_HAS_CUDA -#ifdef _MSC_VER -# include <iso646.h> -#endif +# ifdef _MSC_VER +# include <iso646.h> +# endif -#include "opensubdiv_device_context_cuda.h" +# include "opensubdiv_device_context_cuda.h" -#if defined(_WIN32) -# include <windows.h> -#elif defined(__APPLE__) -# include <OpenGL/OpenGL.h> -#else -# include <GL/glx.h> -# include <X11/Xlib.h> -#endif +# if defined(_WIN32) +# include <windows.h> +# elif defined(__APPLE__) +# include <OpenGL/OpenGL.h> +# else +# include <GL/glx.h> +# include <X11/Xlib.h> +# endif -#include <cuda.h> -#include <cuda_gl_interop.h> -#include <cuda_runtime_api.h> -#include <cstdio> +# include <cuda.h> +# include <cuda_gl_interop.h> +# include <cuda_runtime_api.h> +# include <cstdio> -#include "internal/opensubdiv_util.h" +# include "internal/opensubdiv_util.h" -#define message(fmt, ...) +# define message(fmt, ...) // #define message(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__) -#define error(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__) +# define error(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__) namespace { -int getCudaDeviceForCurrentGLContext() { +int getCudaDeviceForCurrentGLContext() +{ // Find and use the CUDA device for the current GL context unsigned int interop_device_count = 0; int interopDevices[1]; - cudaError_t status = cudaGLGetDevices(&interop_device_count, - interopDevices, - 1, - cudaGLDeviceListCurrentFrame); + cudaError_t status = cudaGLGetDevices( + &interop_device_count, interopDevices, 1, cudaGLDeviceListCurrentFrame); if (status == cudaErrorNoDevice || interop_device_count != 1) { message("CUDA no interop devices found.\n"); return 0; } int device = interopDevices[0]; -#if defined(_WIN32) +# if defined(_WIN32) return device; -#elif defined(__APPLE__) +# elif defined(__APPLE__) return device; -#else // X11 - Display* display = glXGetCurrentDisplay(); +# else // X11 + Display *display = glXGetCurrentDisplay(); int screen = DefaultScreen(display); if (device != screen) { - error("The CUDA interop device (%d) does not match " - "the screen used by the current GL context (%d), " - "which may cause slow performance on systems " - "with multiple GPU devices.", - device, screen); + error( + "The CUDA interop device (%d) does not match " + "the screen used by the current GL context (%d), " + "which may cause slow performance on systems " + "with multiple GPU devices.", + device, + screen); } message("CUDA init using device for current GL context: %d\n", device); return device; -#endif +# endif } // Beginning of GPU Architecture definitions. -int convertSMVer2Cores_local(int major, int minor) { +int convertSMVer2Cores_local(int major, int minor) +{ // Defines for GPU Architecture types (using the SM version to determine // the # of cores per SM typedef struct { @@ -95,15 +97,14 @@ int convertSMVer2Cores_local(int major, int minor) { int Cores; } sSMtoCores; - sSMtoCores nGpuArchCoresPerSM[] = { - {0x10, 8}, // Tesla Generation (SM 1.0) G80 class. - {0x11, 8}, // Tesla Generation (SM 1.1) G8x class. - {0x12, 8}, // Tesla Generation (SM 1.2) G9x class. - {0x13, 8}, // Tesla Generation (SM 1.3) GT200 class. - {0x20, 32}, // Fermi Generation (SM 2.0) GF100 class. - {0x21, 48}, // Fermi Generation (SM 2.1) GF10x class. - {0x30, 192}, // Fermi Generation (SM 3.0) GK10x class. - {-1, -1}}; + sSMtoCores nGpuArchCoresPerSM[] = {{0x10, 8}, // Tesla Generation (SM 1.0) G80 class. + {0x11, 8}, // Tesla Generation (SM 1.1) G8x class. + {0x12, 8}, // Tesla Generation (SM 1.2) G9x class. + {0x13, 8}, // Tesla Generation (SM 1.3) GT200 class. + {0x20, 32}, // Fermi Generation (SM 2.0) GF100 class. + {0x21, 48}, // Fermi Generation (SM 2.1) GF10x class. + {0x30, 192}, // Fermi Generation (SM 3.0) GK10x class. + {-1, -1}}; int index = 0; while (nGpuArchCoresPerSM[index].SM != -1) { if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) { @@ -116,7 +117,8 @@ int convertSMVer2Cores_local(int major, int minor) { } // This function returns the best GPU (with maximum GFLOPS). -int cutGetMaxGflopsDeviceId() { +int cutGetMaxGflopsDeviceId() +{ int current_device = 0, sm_per_multiproc = 0; int max_compute_perf = 0, max_perf_device = -1; int device_count = 0, best_SM_arch = 0; @@ -136,16 +138,15 @@ int cutGetMaxGflopsDeviceId() { cuDeviceComputeCapability(&compat_major, &compat_minor, current_device); if (compat_major == 9999 && compat_minor == 9999) { sm_per_multiproc = 1; - } else { + } + else { sm_per_multiproc = convertSMVer2Cores_local(compat_major, compat_minor); } int multi_processor_count; - cuDeviceGetAttribute(&multi_processor_count, - CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - current_device); + cuDeviceGetAttribute( + &multi_processor_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, current_device); int clock_rate; - cuDeviceGetAttribute(&clock_rate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, - current_device); + cuDeviceGetAttribute(&clock_rate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, current_device); int compute_perf = multi_processor_count * sm_per_multiproc * clock_rate; if (compute_perf > max_compute_perf) { /* If we find GPU with SM major > 2, search only these */ @@ -155,7 +156,8 @@ int cutGetMaxGflopsDeviceId() { max_compute_perf = compute_perf; max_perf_device = current_device; } - } else { + } + else { max_compute_perf = compute_perf; max_perf_device = current_device; } @@ -167,19 +169,20 @@ int cutGetMaxGflopsDeviceId() { } // namespace -bool CudaDeviceContext::HAS_CUDA_VERSION_4_0() { -#ifdef OPENSUBDIV_HAS_CUDA +bool CudaDeviceContext::HAS_CUDA_VERSION_4_0() +{ +# ifdef OPENSUBDIV_HAS_CUDA static bool cuda_initialized = false; static bool cuda_load_success = true; if (!cuda_initialized) { cuda_initialized = true; -#ifdef OPENSUBDIV_HAS_CUEW +# ifdef OPENSUBDIV_HAS_CUEW cuda_load_success = cuewInit(CUEW_INIT_CUDA) == CUEW_SUCCESS; if (!cuda_load_success) { fprintf(stderr, "Loading CUDA failed.\n"); } -#endif +# endif // Need to initialize CUDA here so getting device // with the maximum FPLOS works fine. if (cuInit(0) == CUDA_SUCCESS) { @@ -189,25 +192,28 @@ bool CudaDeviceContext::HAS_CUDA_VERSION_4_0() { if (cutGetMaxGflopsDeviceId() < 0) { cuda_load_success = false; } - } else { + } + else { cuda_load_success = false; } } return cuda_load_success; -#else +# else return false; -#endif +# endif } -CudaDeviceContext::CudaDeviceContext() - : initialized_(false) { +CudaDeviceContext::CudaDeviceContext() : initialized_(false) +{ } -CudaDeviceContext::~CudaDeviceContext() { +CudaDeviceContext::~CudaDeviceContext() +{ cudaDeviceReset(); } -bool CudaDeviceContext::Initialize() { +bool CudaDeviceContext::Initialize() +{ // See if any cuda device is available. int device_count = 0; cudaGetDeviceCount(&device_count); @@ -220,7 +226,8 @@ bool CudaDeviceContext::Initialize() { return true; } -bool CudaDeviceContext::IsInitialized() const { +bool CudaDeviceContext::IsInitialized() const +{ return initialized_; } |