// Adopted from OpenSubdiv with the following license: // // Copyright 2015 Pixar // // Licensed under the Apache License, Version 2.0 (the "Apache License") // with the following modification; you may not use this file except in // compliance with the Apache License and the following modification to it: // Section 6. Trademarks. is deleted and replaced with: // // 6. Trademarks. This License does not grant permission to use the trade // names, trademarks, service marks, or product names of the Licensor // and its affiliates, except as required to comply with Section 4(c) of // the License and to reproduce the content of the NOTICE file. // // You may obtain a copy of the Apache License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the Apache License with the above modification is // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the Apache License for the specific // language governing permissions and limitations under the Apache License. #ifdef OPENSUBDIV_HAS_CUDA #ifdef _MSC_VER # include #endif #include "opensubdiv_device_context_cuda.h" #if defined(_WIN32) # include #elif defined(__APPLE__) # include #else # include # include #endif #include #include #include #include #include #define message(fmt, ...) // #define message(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__) #define error(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__) namespace { int getCudaDeviceForCurrentGLContext() { // Find and use the CUDA device for the current GL context unsigned int interop_device_count = 0; int interopDevices[1]; cudaError_t status = cudaGLGetDevices(&interop_device_count, interopDevices, 1, cudaGLDeviceListCurrentFrame); if (status == cudaErrorNoDevice || interop_device_count != 1) { message("CUDA no interop devices found.\n"); return 0; } int device = interopDevices[0]; #if defined(_WIN32) return device; #elif defined(__APPLE__) return device; #else // X11 Display* display = glXGetCurrentDisplay(); int screen = DefaultScreen(display); if (device != screen) { error("The CUDA interop device (%d) does not match " "the screen used by the current GL context (%d), " "which may cause slow performance on systems " "with multiple GPU devices.", device, screen); } message("CUDA init using device for current GL context: %d\n", device); return device; #endif } // Beginning of GPU Architecture definitions. int convertSMVer2Cores_local(int major, int minor) { // Defines for GPU Architecture types (using the SM version to determine // the # of cores per SM typedef struct { int SM; // 0xMm (hexidecimal notation), // M = SM Major version, // and m = SM minor version int Cores; } sSMtoCores; sSMtoCores nGpuArchCoresPerSM[] = { {0x10, 8}, // Tesla Generation (SM 1.0) G80 class. {0x11, 8}, // Tesla Generation (SM 1.1) G8x class. {0x12, 8}, // Tesla Generation (SM 1.2) G9x class. {0x13, 8}, // Tesla Generation (SM 1.3) GT200 class. {0x20, 32}, // Fermi Generation (SM 2.0) GF100 class. {0x21, 48}, // Fermi Generation (SM 2.1) GF10x class. {0x30, 192}, // Fermi Generation (SM 3.0) GK10x class. {-1, -1}}; int index = 0; while (nGpuArchCoresPerSM[index].SM != -1) { if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) { return nGpuArchCoresPerSM[index].Cores; } index++; } printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor); return -1; } // This function returns the best GPU (with maximum GFLOPS). int cutGetMaxGflopsDeviceId() { int current_device = 0, sm_per_multiproc = 0; int max_compute_perf = 0, max_perf_device = -1; int device_count = 0, best_SM_arch = 0; int compat_major, compat_minor; cuDeviceGetCount(&device_count); // Find the best major SM Architecture GPU device. while (current_device < device_count) { cuDeviceComputeCapability(&compat_major, &compat_minor, current_device); if (compat_major > 0 && compat_major < 9999) { best_SM_arch = std::max(best_SM_arch, compat_major); } current_device++; } // Find the best CUDA capable GPU device. current_device = 0; while (current_device < device_count) { cuDeviceComputeCapability(&compat_major, &compat_minor, current_device); if (compat_major == 9999 && compat_minor == 9999) { sm_per_multiproc = 1; } else { sm_per_multiproc = convertSMVer2Cores_local(compat_major, compat_minor); } int multi_processor_count; cuDeviceGetAttribute(&multi_processor_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, current_device); int clock_rate; cuDeviceGetAttribute(&clock_rate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, current_device); int compute_perf = multi_processor_count * sm_per_multiproc * clock_rate; if (compute_perf > max_compute_perf) { /* If we find GPU with SM major > 2, search only these */ if (best_SM_arch > 2) { /* If our device==dest_SM_arch, choose this, or else pass. */ if (compat_major == best_SM_arch) { max_compute_perf = compute_perf; max_perf_device = current_device; } } else { max_compute_perf = compute_perf; max_perf_device = current_device; } } ++current_device; } return max_perf_device; } } // namespace bool CudaDeviceContext::HAS_CUDA_VERSION_4_0() { #ifdef OPENSUBDIV_HAS_CUDA static bool cuda_initialized = false; static bool cuda_load_success = true; if (!cuda_initialized) { cuda_initialized = true; #ifdef OPENSUBDIV_HAS_CUEW cuda_load_success = cuewInit(CUEW_INIT_CUDA) == CUEW_SUCCESS; if (!cuda_load_success) { fprintf(stderr, "Loading CUDA failed.\n"); } #endif // Need to initialize CUDA here so getting device // with the maximum FPLOS works fine. if (cuInit(0) == CUDA_SUCCESS) { // This is to deal with cases like NVidia Optimus, // when there might be CUDA library installed but // NVidia card is not being active. if (cutGetMaxGflopsDeviceId() < 0) { cuda_load_success = false; } } else { cuda_load_success = false; } } return cuda_load_success; #else return false; #endif } CudaDeviceContext::CudaDeviceContext() : initialized_(false) { } CudaDeviceContext::~CudaDeviceContext() { cudaDeviceReset(); } bool CudaDeviceContext::Initialize() { // See if any cuda device is available. int device_count = 0; cudaGetDeviceCount(&device_count); message("CUDA device count: %d\n", device_count); if (device_count <= 0) { return false; } cudaGLSetGLDevice(getCudaDeviceForCurrentGLContext()); initialized_ = true; return true; } bool CudaDeviceContext::IsInitialized() const { return initialized_; } #endif // OPENSUBDIV_HAS_CUDA