3 files changed, 58 insertions, 44 deletions
diff --git a/intern/cycles/device/cuda/device.cpp b/intern/cycles/device/cuda/device.cpp
index 400490336d6..5a213c45b71 100644
--- a/intern/cycles/device/cuda/device.cpp
+++ b/intern/cycles/device/cuda/device.cpp
@@ -29,24 +29,25 @@ bool device_cuda_init()
   initialized = true;
   int cuew_result = cuewInit(CUEW_INIT_CUDA);
   if (cuew_result == CUEW_SUCCESS) {
-    VLOG(1) << "CUEW initialization succeeded";
+    VLOG_INFO << "CUEW initialization succeeded";
     if (CUDADevice::have_precompiled_kernels()) {
-      VLOG(1) << "Found precompiled kernels";
+      VLOG_INFO << "Found precompiled kernels";
       result = true;
     }
     else if (cuewCompilerPath() != NULL) {
-      VLOG(1) << "Found CUDA compiler " << cuewCompilerPath();
+      VLOG_INFO << "Found CUDA compiler " << cuewCompilerPath();
       result = true;
     }
     else {
-      VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found,"
-              << " unable to use CUDA";
+      VLOG_INFO << "Neither precompiled kernels nor CUDA compiler was found,"
+                << " unable to use CUDA";
     }
   }
   else {
-    VLOG(1) << "CUEW initialization failed: "
-            << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
-                                                            "Error opening the library");
+    VLOG_WARNING << "CUEW initialization failed: "
+                 << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ?
+                         "Error setting up atexit() handler" :
+                         "Error opening the library");
   }
 
   return result;
@@ -121,7 +122,8 @@ void device_cuda_info(vector<DeviceInfo> &devices)
     int major;
     cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
     if (major < 3) {
-      VLOG(1) << "Ignoring device \"" << name << "\", this graphics card is no longer supported.";
+      VLOG_INFO << "Ignoring device \"" << name
+                << "\", this graphics card is no longer supported.";
       continue;
     }
 
@@ -166,21 +168,21 @@ void device_cuda_info(vector<DeviceInfo> &devices)
      * Windows 10 even when it is, due to an issue in application profiles.
      * Detect case where we expect it to be available and override. */
     if (preempt_attr == 0 && (major >= 6) && system_windows_version_at_least(10, 17134)) {
-      VLOG(1) << "Assuming device has compute preemption on Windows 10.";
+      VLOG_INFO << "Assuming device has compute preemption on Windows 10.";
       preempt_attr = 1;
     }
 
     if (timeout_attr && !preempt_attr) {
-      VLOG(1) << "Device is recognized as display.";
+      VLOG_INFO << "Device is recognized as display.";
       info.description += " (Display)";
       info.display_device = true;
       display_devices.push_back(info);
     }
     else {
-      VLOG(1) << "Device has compute preemption or is not used for display.";
+      VLOG_INFO << "Device has compute preemption or is not used for display.";
       devices.push_back(info);
     }
-    VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\".";
+    VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
   }
 
   if (!display_devices.empty())
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp
index cb7e909a2d5..01c021551f3 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -23,6 +23,8 @@
 #  include "util/types.h"
 #  include "util/windows.h"
 
+#  include "kernel/device/cuda/globals.h"
+
 CCL_NAMESPACE_BEGIN
 
 class CUDADevice;
@@ -51,7 +53,7 @@ void CUDADevice::set_error(const string &error)
 }
 
 CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
-    : Device(info, stats, profiler), texture_info(this, "__texture_info", MEM_GLOBAL)
+    : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
 {
   first_error = true;
 
@@ -244,9 +246,9 @@ string CUDADevice::compile_kernel(const uint kernel_features,
   if (!use_adaptive_compilation()) {
     if (!force_ptx) {
       const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
-      VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
+      VLOG_INFO << "Testing for pre-compiled kernel " << cubin << ".";
       if (path_exists(cubin)) {
-        VLOG(1) << "Using precompiled kernel.";
+        VLOG_INFO << "Using precompiled kernel.";
         return cubin;
       }
     }
@@ -256,9 +258,9 @@ string CUDADevice::compile_kernel(const uint kernel_features,
     while (ptx_major >= 3) {
       const string ptx = path_get(
           string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor));
-      VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
+      VLOG_INFO << "Testing for pre-compiled kernel " << ptx << ".";
       if (path_exists(ptx)) {
-        VLOG(1) << "Using precompiled kernel.";
+        VLOG_INFO << "Using precompiled kernel.";
         return ptx;
       }
 
@@ -287,9 +289,9 @@ string CUDADevice::compile_kernel(const uint kernel_features,
   const string cubin_file = string_printf(
       "cycles_%s_%s_%d%d_%s.%s", name, kernel_arch, major, minor, kernel_md5.c_str(), kernel_ext);
   const string cubin = path_cache_get(path_join("kernels", cubin_file));
-  VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
+  VLOG_INFO << "Testing for locally compiled kernel " << cubin << ".";
   if (path_exists(cubin)) {
-    VLOG(1) << "Using locally compiled kernel.";
+    VLOG_INFO << "Using locally compiled kernel.";
     return cubin;
   }
 
@@ -323,7 +325,7 @@ string CUDADevice::compile_kernel(const uint kernel_features,
   }
 
   const int nvcc_cuda_version = cuewCompilerVersion();
-  VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << nvcc_cuda_version << ".";
+  VLOG_INFO << "Found nvcc " << nvcc << ", CUDA version " << nvcc_cuda_version << ".";
   if (nvcc_cuda_version < 101) {
     printf(
         "Unsupported CUDA version %d.%d detected, "
@@ -399,7 +401,8 @@ bool CUDADevice::load_kernels(const uint kernel_features)
    */
   if (cuModule) {
     if (use_adaptive_compilation()) {
-      VLOG(1) << "Skipping CUDA kernel reload for adaptive compilation, not currently supported.";
+      VLOG_INFO
+          << "Skipping CUDA kernel reload for adaptive compilation, not currently supported.";
     }
     return true;
   }
@@ -481,8 +484,8 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
     cuMemGetInfo(&free_after, &total);
   }
 
-  VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after)
-          << " bytes. (" << string_human_readable_size(free_before - free_after) << ")";
+  VLOG_INFO << "Local memory reserved " << string_human_readable_number(free_before - free_after)
+            << " bytes. (" << string_human_readable_size(free_before - free_after) << ")";
 
 #  if 0
   /* For testing mapped host memory, fill up device memory. */
@@ -513,7 +516,7 @@ void CUDADevice::init_host_memory()
     }
   }
   else {
-    VLOG(1) << "Mapped host memory disabled, failed to get system RAM";
+    VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
     map_host_limit = 0;
   }
 
@@ -524,8 +527,8 @@ void CUDADevice::init_host_memory()
   device_working_headroom = 32 * 1024 * 1024LL;   // 32MB
   device_texture_headroom = 128 * 1024 * 1024LL;  // 128MB
 
-  VLOG(1) << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
-          << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
+  VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
+            << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
 }
 
 void CUDADevice::load_texture_info()
@@ -593,7 +596,7 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
      * multiple CUDA devices could be moving the memory. The
      * first one will do it, and the rest will adopt the pointer. */
     if (max_mem) {
-      VLOG(1) << "Move memory from device to host: " << max_mem->name;
+      VLOG_WORK << "Move memory from device to host: " << max_mem->name;
 
       static thread_mutex move_mutex;
       thread_scoped_lock lock(move_mutex);
@@ -701,9 +704,9 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
   }
 
   if (mem.name) {
-    VLOG(1) << "Buffer allocate: " << mem.name << ", "
-            << string_human_readable_number(mem.memory_size()) << " bytes. ("
-            << string_human_readable_size(mem.memory_size()) << ")" << status;
+    VLOG_WORK << "Buffer allocate: " << mem.name << ", "
+              << string_human_readable_number(mem.memory_size()) << " bytes. ("
+              << string_human_readable_size(mem.memory_size()) << ")" << status;
   }
 
   mem.device_pointer = (device_ptr)device_pointer;
@@ -899,9 +902,19 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size)
   CUdeviceptr mem;
   size_t bytes;
 
-  cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
-  // assert(bytes == size);
-  cuda_assert(cuMemcpyHtoD(mem, host, size));
+  cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, "kernel_params"));
+  assert(bytes == sizeof(KernelParamsCUDA));
+
+  /* Update data storage pointers in launch parameters. */
+#  define KERNEL_DATA_ARRAY(data_type, data_name) \
+    if (strcmp(name, #data_name) == 0) { \
+      cuda_assert(cuMemcpyHtoD(mem + offsetof(KernelParamsCUDA, data_name), host, size)); \
+      return; \
+    }
+  KERNEL_DATA_ARRAY(KernelData, data)
+  KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state)
+#  include "kernel/data_arrays.h"
+#  undef KERNEL_DATA_ARRAY
 }
 
 void CUDADevice::global_alloc(device_memory &mem)
@@ -925,7 +938,6 @@ void CUDADevice::tex_alloc(device_texture &mem)
 {
   CUDAContextScope scope(this);
 
-  string bind_name = mem.name;
   size_t dsize = datatype_size(mem.data_type);
   size_t size = mem.memory_size();
 
@@ -1008,9 +1020,9 @@ void CUDADevice::tex_alloc(device_texture &mem)
     desc.NumChannels = mem.data_elements;
     desc.Flags = 0;
 
-    VLOG(1) << "Array 3D allocate: " << mem.name << ", "
-            << string_human_readable_number(mem.memory_size()) << " bytes. ("
-            << string_human_readable_size(mem.memory_size()) << ")";
+    VLOG_WORK << "Array 3D allocate: " << mem.name << ", "
+              << string_human_readable_number(mem.memory_size()) << " bytes. ("
+              << string_human_readable_size(mem.memory_size()) << ")";
 
     cuda_assert(cuArray3DCreate(&array_3d, &desc));
 
@@ -1190,11 +1202,11 @@ bool CUDADevice::should_use_graphics_interop()
   }
 
   vector<CUdevice> gl_devices(num_all_devices);
-  uint num_gl_devices;
+  uint num_gl_devices = 0;
   cuGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, CU_GL_DEVICE_LIST_ALL);
 
-  for (CUdevice gl_device : gl_devices) {
-    if (gl_device == cuDevice) {
+  for (uint i = 0; i < num_gl_devices; ++i) {
+    if (gl_devices[i] == cuDevice) {
       return true;
     }
   }
diff --git a/intern/cycles/device/cuda/queue.cpp b/intern/cycles/device/cuda/queue.cpp
index 38c71866ad0..5912e68a92b 100644
--- a/intern/cycles/device/cuda/queue.cpp
+++ b/intern/cycles/device/cuda/queue.cpp
@@ -39,12 +39,12 @@ int CUDADeviceQueue::num_concurrent_states(const size_t state_size) const
       num_states = max((int)(num_states * factor), 1024);
     }
     else {
-      VLOG(3) << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0";
+      VLOG_DEVICE_STATS << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0";
     }
   }
 
-  VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to "
-          << string_human_readable_size(num_states * state_size);
+  VLOG_DEVICE_STATS << "GPU queue concurrent states: " << num_states << ", using up to "
+                    << string_human_readable_size(num_states * state_size);
 
   return num_states;
 }