1 files changed, 340 insertions, 0 deletions
diff --git a/intern/cycles/device/cuda/device.cpp b/intern/cycles/device/cuda/device.cpp
new file mode 100644
index 00000000000..84becd6d081
--- /dev/null
+++ b/intern/cycles/device/cuda/device.cpp
@@ -0,0 +1,340 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/cuda/device.h"
+
+#include "util/util_logging.h"
+
+#ifdef WITH_CUDA
+#  include "device/cuda/device_impl.h"
+#  include "device/device.h"
+
+#  include "util/util_string.h"
+#  include "util/util_windows.h"
+#endif /* WITH_CUDA */
+
+CCL_NAMESPACE_BEGIN
+
+bool device_cuda_init()
+{
+#if !defined(WITH_CUDA)
+  return false;
+#elif defined(WITH_CUDA_DYNLOAD)
+  static bool initialized = false;
+  static bool result = false;
+
+  if (initialized)
+    return result;
+
+  initialized = true;
+  int cuew_result = cuewInit(CUEW_INIT_CUDA);
+  if (cuew_result == CUEW_SUCCESS) {
+    VLOG(1) << "CUEW initialization succeeded";
+    if (CUDADevice::have_precompiled_kernels()) {
+      VLOG(1) << "Found precompiled kernels";
+      result = true;
+    }
+    else if (cuewCompilerPath() != NULL) {
+      VLOG(1) << "Found CUDA compiler " << cuewCompilerPath();
+      result = true;
+    }
+    else {
+      VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found,"
+              << " unable to use CUDA";
+    }
+  }
+  else {
+    VLOG(1) << "CUEW initialization failed: "
+            << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
+                                                            "Error opening the library");
+  }
+
+  return result;
+#else  /* WITH_CUDA_DYNLOAD */
+  return true;
+#endif /* WITH_CUDA_DYNLOAD */
+}
+
+Device *device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
+{
+#ifdef WITH_CUDA
+  return new CUDADevice(info, stats, profiler);
+#else
+  (void)info;
+  (void)stats;
+  (void)profiler;
+
+  LOG(FATAL) << "Request to create CUDA device without compiled-in support. Should never happen.";
+
+  return nullptr;
+#endif
+}
+
+#ifdef WITH_CUDA
+static CUresult device_cuda_safe_init()
+{
+#  ifdef _WIN32
+  __try {
+    return cuInit(0);
+  }
+  __except (EXCEPTION_EXECUTE_HANDLER) {
+    /* Ignore crashes inside the CUDA driver and hope we can
+     * survive even with corrupted CUDA installs. */
+    fprintf(stderr, "Cycles CUDA: driver crashed, continuing without CUDA.\n");
+  }
+
+  return CUDA_ERROR_NO_DEVICE;
+#  else
+  return cuInit(0);
+#  endif
+}
+#endif /* WITH_CUDA */
+
+void device_cuda_info(vector<DeviceInfo> &devices)
+{
+#ifdef WITH_CUDA
+  CUresult result = device_cuda_safe_init();
+  if (result != CUDA_SUCCESS) {
+    if (result != CUDA_ERROR_NO_DEVICE)
+      fprintf(stderr, "CUDA cuInit: %s\n", cuewErrorString(result));
+    return;
+  }
+
+  int count = 0;
+  result = cuDeviceGetCount(&count);
+  if (result != CUDA_SUCCESS) {
+    fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result));
+    return;
+  }
+
+  vector<DeviceInfo> display_devices;
+
+  for (int num = 0; num < count; num++) {
+    char name[256];
+
+    result = cuDeviceGetName(name, 256, num);
+    if (result != CUDA_SUCCESS) {
+      fprintf(stderr, "CUDA cuDeviceGetName: %s\n", cuewErrorString(result));
+      continue;
+    }
+
+    int major;
+    cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
+    if (major < 3) {
+      VLOG(1) << "Ignoring device \"" << name << "\", this graphics card is no longer supported.";
+      continue;
+    }
+
+    DeviceInfo info;
+
+    info.type = DEVICE_CUDA;
+    info.description = string(name);
+    info.num = num;
+
+    info.has_half_images = (major >= 3);
+    info.has_nanovdb = true;
+    info.denoisers = 0;
+
+    info.has_gpu_queue = true;
+
+    /* Check if the device has P2P access to any other device in the system. */
+    for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
+      if (num != peer_num) {
+        int can_access = 0;
+        cuDeviceCanAccessPeer(&can_access, num, peer_num);
+        info.has_peer_memory = (can_access != 0);
+      }
+    }
+
+    int pci_location[3] = {0, 0, 0};
+    cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
+    cuDeviceGetAttribute(&pci_location[1], CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, num);
+    cuDeviceGetAttribute(&pci_location[2], CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, num);
+    info.id = string_printf("CUDA_%s_%04x:%02x:%02x",
+                            name,
+                            (unsigned int)pci_location[0],
+                            (unsigned int)pci_location[1],
+                            (unsigned int)pci_location[2]);
+
+    /* If device has a kernel timeout and no compute preemption, we assume
+     * it is connected to a display and will freeze the display while doing
+     * computations. */
+    int timeout_attr = 0, preempt_attr = 0;
+    cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num);
+    cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num);
+
+    /* The CUDA driver reports compute preemption as not being available on
+     * Windows 10 even when it is, due to an issue in application profiles.
+     * Detect case where we expect it to be available and override. */
+    if (preempt_attr == 0 && (major >= 6) && system_windows_version_at_least(10, 17134)) {
+      VLOG(1) << "Assuming device has compute preemption on Windows 10.";
+      preempt_attr = 1;
+    }
+
+    if (timeout_attr && !preempt_attr) {
+      VLOG(1) << "Device is recognized as display.";
+      info.description += " (Display)";
+      info.display_device = true;
+      display_devices.push_back(info);
+    }
+    else {
+      VLOG(1) << "Device has compute preemption or is not used for display.";
+      devices.push_back(info);
+    }
+    VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\".";
+  }
+
+  if (!display_devices.empty())
+    devices.insert(devices.end(), display_devices.begin(), display_devices.end());
+#else  /* WITH_CUDA */
+  (void)devices;
+#endif /* WITH_CUDA */
+}
+
+string device_cuda_capabilities()
+{
+#ifdef WITH_CUDA
+  CUresult result = device_cuda_safe_init();
+  if (result != CUDA_SUCCESS) {
+    if (result != CUDA_ERROR_NO_DEVICE) {
+      return string("Error initializing CUDA: ") + cuewErrorString(result);
+    }
+    return "No CUDA device found\n";
+  }
+
+  int count;
+  result = cuDeviceGetCount(&count);
+  if (result != CUDA_SUCCESS) {
+    return string("Error getting devices: ") + cuewErrorString(result);
+  }
+
+  string capabilities = "";
+  for (int num = 0; num < count; num++) {
+    char name[256];
+    if (cuDeviceGetName(name, 256, num) != CUDA_SUCCESS) {
+      continue;
+    }
+    capabilities += string("\t") + name + "\n";
+    int value;
+#  define GET_ATTR(attr) \
+    { \
+      if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
+        capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
+      } \
+    } \
+    (void)0
+    /* TODO(sergey): Strip all attributes which are not useful for us
+     * or does not depend on the driver.
+     */
+    GET_ATTR(MAX_THREADS_PER_BLOCK);
+    GET_ATTR(MAX_BLOCK_DIM_X);
+    GET_ATTR(MAX_BLOCK_DIM_Y);
+    GET_ATTR(MAX_BLOCK_DIM_Z);
+    GET_ATTR(MAX_GRID_DIM_X);
+    GET_ATTR(MAX_GRID_DIM_Y);
+    GET_ATTR(MAX_GRID_DIM_Z);
+    GET_ATTR(MAX_SHARED_MEMORY_PER_BLOCK);
+    GET_ATTR(SHARED_MEMORY_PER_BLOCK);
+    GET_ATTR(TOTAL_CONSTANT_MEMORY);
+    GET_ATTR(WARP_SIZE);
+    GET_ATTR(MAX_PITCH);
+    GET_ATTR(MAX_REGISTERS_PER_BLOCK);
+    GET_ATTR(REGISTERS_PER_BLOCK);
+    GET_ATTR(CLOCK_RATE);
+    GET_ATTR(TEXTURE_ALIGNMENT);
+    GET_ATTR(GPU_OVERLAP);
+    GET_ATTR(MULTIPROCESSOR_COUNT);
+    GET_ATTR(KERNEL_EXEC_TIMEOUT);
+    GET_ATTR(INTEGRATED);
+    GET_ATTR(CAN_MAP_HOST_MEMORY);
+    GET_ATTR(COMPUTE_MODE);
+    GET_ATTR(MAXIMUM_TEXTURE1D_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_LAYERS);
+    GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES);
+    GET_ATTR(SURFACE_ALIGNMENT);
+    GET_ATTR(CONCURRENT_KERNELS);
+    GET_ATTR(ECC_ENABLED);
+    GET_ATTR(TCC_DRIVER);
+    GET_ATTR(MEMORY_CLOCK_RATE);
+    GET_ATTR(GLOBAL_MEMORY_BUS_WIDTH);
+    GET_ATTR(L2_CACHE_SIZE);
+    GET_ATTR(MAX_THREADS_PER_MULTIPROCESSOR);
+    GET_ATTR(ASYNC_ENGINE_COUNT);
+    GET_ATTR(UNIFIED_ADDRESSING);
+    GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_LAYERS);
+    GET_ATTR(CAN_TEX2D_GATHER);
+    GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE);
+    GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE);
+    GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE);
+    GET_ATTR(TEXTURE_PITCH_ALIGNMENT);
+    GET_ATTR(MAXIMUM_TEXTURECUBEMAP_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS);
+    GET_ATTR(MAXIMUM_SURFACE1D_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACE2D_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACE2D_HEIGHT);
+    GET_ATTR(MAXIMUM_SURFACE3D_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACE3D_HEIGHT);
+    GET_ATTR(MAXIMUM_SURFACE3D_DEPTH);
+    GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_LAYERS);
+    GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_HEIGHT);
+    GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_LAYERS);
+    GET_ATTR(MAXIMUM_SURFACECUBEMAP_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS);
+    GET_ATTR(MAXIMUM_TEXTURE1D_LINEAR_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_PITCH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT);
+    GET_ATTR(COMPUTE_CAPABILITY_MAJOR);
+    GET_ATTR(COMPUTE_CAPABILITY_MINOR);
+    GET_ATTR(MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH);
+    GET_ATTR(STREAM_PRIORITIES_SUPPORTED);
+    GET_ATTR(GLOBAL_L1_CACHE_SUPPORTED);
+    GET_ATTR(LOCAL_L1_CACHE_SUPPORTED);
+    GET_ATTR(MAX_SHARED_MEMORY_PER_MULTIPROCESSOR);
+    GET_ATTR(MAX_REGISTERS_PER_MULTIPROCESSOR);
+    GET_ATTR(MANAGED_MEMORY);
+    GET_ATTR(MULTI_GPU_BOARD);
+    GET_ATTR(MULTI_GPU_BOARD_GROUP_ID);
+#  undef GET_ATTR
+    capabilities += "\n";
+  }
+
+  return capabilities;
+
+#else  /* WITH_CUDA */
+  return "";
+#endif /* WITH_CUDA */
+}
+
+CCL_NAMESPACE_END