diff options
Diffstat (limited to 'intern/cycles/device/cuda')
-rw-r--r-- | intern/cycles/device/cuda/device.cpp | 28 | ||||
-rw-r--r-- | intern/cycles/device/cuda/device_impl.cpp | 68 | ||||
-rw-r--r-- | intern/cycles/device/cuda/queue.cpp | 6 |
3 files changed, 58 insertions, 44 deletions
diff --git a/intern/cycles/device/cuda/device.cpp b/intern/cycles/device/cuda/device.cpp index 400490336d6..5a213c45b71 100644 --- a/intern/cycles/device/cuda/device.cpp +++ b/intern/cycles/device/cuda/device.cpp @@ -29,24 +29,25 @@ bool device_cuda_init() initialized = true; int cuew_result = cuewInit(CUEW_INIT_CUDA); if (cuew_result == CUEW_SUCCESS) { - VLOG(1) << "CUEW initialization succeeded"; + VLOG_INFO << "CUEW initialization succeeded"; if (CUDADevice::have_precompiled_kernels()) { - VLOG(1) << "Found precompiled kernels"; + VLOG_INFO << "Found precompiled kernels"; result = true; } else if (cuewCompilerPath() != NULL) { - VLOG(1) << "Found CUDA compiler " << cuewCompilerPath(); + VLOG_INFO << "Found CUDA compiler " << cuewCompilerPath(); result = true; } else { - VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found," - << " unable to use CUDA"; + VLOG_INFO << "Neither precompiled kernels nor CUDA compiler was found," + << " unable to use CUDA"; } } else { - VLOG(1) << "CUEW initialization failed: " - << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" : - "Error opening the library"); + VLOG_WARNING << "CUEW initialization failed: " + << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ? + "Error setting up atexit() handler" : + "Error opening the library"); } return result; @@ -121,7 +122,8 @@ void device_cuda_info(vector<DeviceInfo> &devices) int major; cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num); if (major < 3) { - VLOG(1) << "Ignoring device \"" << name << "\", this graphics card is no longer supported."; + VLOG_INFO << "Ignoring device \"" << name + << "\", this graphics card is no longer supported."; continue; } @@ -166,21 +168,21 @@ void device_cuda_info(vector<DeviceInfo> &devices) * Windows 10 even when it is, due to an issue in application profiles. * Detect case where we expect it to be available and override. */ if (preempt_attr == 0 && (major >= 6) && system_windows_version_at_least(10, 17134)) { - VLOG(1) << "Assuming device has compute preemption on Windows 10."; + VLOG_INFO << "Assuming device has compute preemption on Windows 10."; preempt_attr = 1; } if (timeout_attr && !preempt_attr) { - VLOG(1) << "Device is recognized as display."; + VLOG_INFO << "Device is recognized as display."; info.description += " (Display)"; info.display_device = true; display_devices.push_back(info); } else { - VLOG(1) << "Device has compute preemption or is not used for display."; + VLOG_INFO << "Device has compute preemption or is not used for display."; devices.push_back(info); } - VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\"."; + VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\"."; } if (!display_devices.empty()) diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index cb7e909a2d5..01c021551f3 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -23,6 +23,8 @@ # include "util/types.h" # include "util/windows.h" +# include "kernel/device/cuda/globals.h" + CCL_NAMESPACE_BEGIN class CUDADevice; @@ -51,7 +53,7 @@ void CUDADevice::set_error(const string &error) } CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) - : Device(info, stats, profiler), texture_info(this, "__texture_info", MEM_GLOBAL) + : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL) { first_error = true; @@ -244,9 +246,9 @@ string CUDADevice::compile_kernel(const uint kernel_features, if (!use_adaptive_compilation()) { if (!force_ptx) { const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor)); - VLOG(1) << "Testing for pre-compiled kernel " << cubin << "."; + VLOG_INFO << "Testing for pre-compiled kernel " << cubin << "."; if (path_exists(cubin)) { - VLOG(1) << "Using precompiled kernel."; + VLOG_INFO << "Using precompiled kernel."; return cubin; } } @@ -256,9 +258,9 @@ string CUDADevice::compile_kernel(const uint kernel_features, while (ptx_major >= 3) { const string ptx = path_get( string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor)); - VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; + VLOG_INFO << "Testing for pre-compiled kernel " << ptx << "."; if (path_exists(ptx)) { - VLOG(1) << "Using precompiled kernel."; + VLOG_INFO << "Using precompiled kernel."; return ptx; } @@ -287,9 +289,9 @@ string CUDADevice::compile_kernel(const uint kernel_features, const string cubin_file = string_printf( "cycles_%s_%s_%d%d_%s.%s", name, kernel_arch, major, minor, kernel_md5.c_str(), kernel_ext); const string cubin = path_cache_get(path_join("kernels", cubin_file)); - VLOG(1) << "Testing for locally compiled kernel " << cubin << "."; + VLOG_INFO << "Testing for locally compiled kernel " << cubin << "."; if (path_exists(cubin)) { - VLOG(1) << "Using locally compiled kernel."; + VLOG_INFO << "Using locally compiled kernel."; return cubin; } @@ -323,7 +325,7 @@ string CUDADevice::compile_kernel(const uint kernel_features, } const int nvcc_cuda_version = cuewCompilerVersion(); - VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << nvcc_cuda_version << "."; + VLOG_INFO << "Found nvcc " << nvcc << ", CUDA version " << nvcc_cuda_version << "."; if (nvcc_cuda_version < 101) { printf( "Unsupported CUDA version %d.%d detected, " @@ -399,7 +401,8 @@ bool CUDADevice::load_kernels(const uint kernel_features) */ if (cuModule) { if (use_adaptive_compilation()) { - VLOG(1) << "Skipping CUDA kernel reload for adaptive compilation, not currently supported."; + VLOG_INFO + << "Skipping CUDA kernel reload for adaptive compilation, not currently supported."; } return true; } @@ -481,8 +484,8 @@ void CUDADevice::reserve_local_memory(const uint kernel_features) cuMemGetInfo(&free_after, &total); } - VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after) - << " bytes. (" << string_human_readable_size(free_before - free_after) << ")"; + VLOG_INFO << "Local memory reserved " << string_human_readable_number(free_before - free_after) + << " bytes. (" << string_human_readable_size(free_before - free_after) << ")"; # if 0 /* For testing mapped host memory, fill up device memory. */ @@ -513,7 +516,7 @@ void CUDADevice::init_host_memory() } } else { - VLOG(1) << "Mapped host memory disabled, failed to get system RAM"; + VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM"; map_host_limit = 0; } @@ -524,8 +527,8 @@ void CUDADevice::init_host_memory() device_working_headroom = 32 * 1024 * 1024LL; // 32MB device_texture_headroom = 128 * 1024 * 1024LL; // 128MB - VLOG(1) << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit) - << " bytes. (" << string_human_readable_size(map_host_limit) << ")"; + VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit) + << " bytes. (" << string_human_readable_size(map_host_limit) << ")"; } void CUDADevice::load_texture_info() @@ -593,7 +596,7 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture) * multiple CUDA devices could be moving the memory. The * first one will do it, and the rest will adopt the pointer. */ if (max_mem) { - VLOG(1) << "Move memory from device to host: " << max_mem->name; + VLOG_WORK << "Move memory from device to host: " << max_mem->name; static thread_mutex move_mutex; thread_scoped_lock lock(move_mutex); @@ -701,9 +704,9 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_ } if (mem.name) { - VLOG(1) << "Buffer allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")" << status; + VLOG_WORK << "Buffer allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")" << status; } mem.device_pointer = (device_ptr)device_pointer; @@ -899,9 +902,19 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size) CUdeviceptr mem; size_t bytes; - cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name)); - // assert(bytes == size); - cuda_assert(cuMemcpyHtoD(mem, host, size)); + cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, "kernel_params")); + assert(bytes == sizeof(KernelParamsCUDA)); + + /* Update data storage pointers in launch parameters. */ +# define KERNEL_DATA_ARRAY(data_type, data_name) \ + if (strcmp(name, #data_name) == 0) { \ + cuda_assert(cuMemcpyHtoD(mem + offsetof(KernelParamsCUDA, data_name), host, size)); \ + return; \ + } + KERNEL_DATA_ARRAY(KernelData, data) + KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state) +# include "kernel/data_arrays.h" +# undef KERNEL_DATA_ARRAY } void CUDADevice::global_alloc(device_memory &mem) @@ -925,7 +938,6 @@ void CUDADevice::tex_alloc(device_texture &mem) { CUDAContextScope scope(this); - string bind_name = mem.name; size_t dsize = datatype_size(mem.data_type); size_t size = mem.memory_size(); @@ -1008,9 +1020,9 @@ void CUDADevice::tex_alloc(device_texture &mem) desc.NumChannels = mem.data_elements; desc.Flags = 0; - VLOG(1) << "Array 3D allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; + VLOG_WORK << "Array 3D allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; cuda_assert(cuArray3DCreate(&array_3d, &desc)); @@ -1190,11 +1202,11 @@ bool CUDADevice::should_use_graphics_interop() } vector<CUdevice> gl_devices(num_all_devices); - uint num_gl_devices; + uint num_gl_devices = 0; cuGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, CU_GL_DEVICE_LIST_ALL); - for (CUdevice gl_device : gl_devices) { - if (gl_device == cuDevice) { + for (uint i = 0; i < num_gl_devices; ++i) { + if (gl_devices[i] == cuDevice) { return true; } } diff --git a/intern/cycles/device/cuda/queue.cpp b/intern/cycles/device/cuda/queue.cpp index 38c71866ad0..5912e68a92b 100644 --- a/intern/cycles/device/cuda/queue.cpp +++ b/intern/cycles/device/cuda/queue.cpp @@ -39,12 +39,12 @@ int CUDADeviceQueue::num_concurrent_states(const size_t state_size) const num_states = max((int)(num_states * factor), 1024); } else { - VLOG(3) << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0"; + VLOG_DEVICE_STATS << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0"; } } - VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to " - << string_human_readable_size(num_states * state_size); + VLOG_DEVICE_STATS << "GPU queue concurrent states: " << num_states << ", using up to " + << string_human_readable_size(num_states * state_size); return num_states; } |