diff options
Diffstat (limited to 'intern/cycles/device/cuda')
-rw-r--r-- | intern/cycles/device/cuda/device_impl.cpp | 20 | ||||
-rw-r--r-- | intern/cycles/device/cuda/queue.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/device/cuda/queue.h | 4 |
3 files changed, 21 insertions, 9 deletions
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index f7b3c5ad77f..8d022040414 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -477,10 +477,10 @@ void CUDADevice::reserve_local_memory(const uint kernel_features) * still to make it faster. */ CUDADeviceQueue queue(this); - void *d_path_index = nullptr; - void *d_render_buffer = nullptr; + device_ptr d_path_index = 0; + device_ptr d_render_buffer = 0; int d_work_size = 0; - void *args[] = {&d_path_index, &d_render_buffer, &d_work_size}; + DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size); queue.init_execution(); queue.enqueue(test_kernel, 1, args); @@ -678,7 +678,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_ void *shared_pointer = 0; - if (mem_alloc_result != CUDA_SUCCESS && can_map_host) { + if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) { if (mem.shared_pointer) { /* Another device already allocated host memory. */ mem_alloc_result = CUDA_SUCCESS; @@ -701,8 +701,14 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_ } if (mem_alloc_result != CUDA_SUCCESS) { - status = " failed, out of device and host memory"; - set_error("System is out of GPU and shared host memory"); + if (mem.type == MEM_DEVICE_ONLY) { + status = " failed, out of device memory"; + set_error("System is out of GPU memory"); + } + else { + status = " failed, out of device and host memory"; + set_error("System is out of GPU and shared host memory"); + } } if (mem.name) { @@ -775,6 +781,7 @@ void CUDADevice::generic_free(device_memory &mem) if (mem.device_pointer) { CUDAContextScope scope(this); thread_scoped_lock lock(cuda_mem_map_mutex); + DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end()); const CUDAMem &cmem = cuda_mem_map[&mem]; /* If cmem.use_mapped_host is true, reference counting is used @@ -1141,6 +1148,7 @@ void CUDADevice::tex_free(device_texture &mem) if (mem.device_pointer) { CUDAContextScope scope(this); thread_scoped_lock lock(cuda_mem_map_mutex); + DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end()); const CUDAMem &cmem = cuda_mem_map[&mem]; if (cmem.texobject) { diff --git a/intern/cycles/device/cuda/queue.cpp b/intern/cycles/device/cuda/queue.cpp index 09352a84181..ca57882adbf 100644 --- a/intern/cycles/device/cuda/queue.cpp +++ b/intern/cycles/device/cuda/queue.cpp @@ -89,7 +89,9 @@ bool CUDADeviceQueue::kernel_available(DeviceKernel kernel) const return cuda_device_->kernels.available(kernel); } -bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[]) +bool CUDADeviceQueue::enqueue(DeviceKernel kernel, + const int work_size, + DeviceKernelArguments const &args) { if (cuda_device_->have_error()) { return false; @@ -133,7 +135,7 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *ar 1, shared_mem_bytes, cuda_stream_, - args, + const_cast<void **>(args.values), 0), "enqueue"); diff --git a/intern/cycles/device/cuda/queue.h b/intern/cycles/device/cuda/queue.h index 28613cda071..0836af12098 100644 --- a/intern/cycles/device/cuda/queue.h +++ b/intern/cycles/device/cuda/queue.h @@ -42,7 +42,9 @@ class CUDADeviceQueue : public DeviceQueue { virtual bool kernel_available(DeviceKernel kernel) const override; - virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override; + virtual bool enqueue(DeviceKernel kernel, + const int work_size, + DeviceKernelArguments const &args) override; virtual bool synchronize() override; |