diff options
author | Patrick Mours <pmours@nvidia.com> | 2020-02-25 19:10:39 +0300 |
---|---|---|
committer | Patrick Mours <pmours@nvidia.com> | 2020-02-25 19:13:59 +0300 |
commit | a93043153a35200b62513b4749ab1018b354918a (patch) | |
tree | 801eb217ebe54862fd4aa0fc8711de9dcb450c60 /intern | |
parent | e75059e3b6a22b70bef4b4f86829f79f1abd188f (diff) |
Cleanup: Remove superfluous "cuda_device_ptr" function
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/cuda/device_cuda.h | 5 | ||||
-rw-r--r-- | intern/cycles/device/cuda/device_cuda_impl.cpp | 53 |
2 files changed, 26 insertions, 32 deletions
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h index 5820b525fd6..a825bd6b128 100644 --- a/intern/cycles/device/cuda/device_cuda.h +++ b/intern/cycles/device/cuda/device_cuda.h @@ -82,11 +82,6 @@ class CUDADevice : public Device { device_vector<TextureInfo> texture_info; bool need_texture_info; - CUdeviceptr cuda_device_ptr(device_ptr mem) - { - return (CUdeviceptr)mem; - } - static bool have_precompiled_kernels(); virtual bool show_samples() const; diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index 5c25d7b0208..f6a4f93a690 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -829,18 +829,17 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_ void CUDADevice::generic_copy_to(device_memory &mem) { - if (mem.host_pointer && mem.device_pointer) { - CUDAContextScope scope(this); - - /* If use_mapped_host of mem is false, the current device only - * uses device memory allocated by cuMemAlloc regardless of - * mem.host_pointer and mem.shared_pointer, and should copy - * data from mem.host_pointer. */ + if (!mem.host_pointer || !mem.device_pointer) { + return; + } - if (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer) { - cuda_assert( - cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size())); - } + /* If use_mapped_host of mem is false, the current device only uses device memory allocated by + * cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from + * mem.host_pointer. */ + if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) { + const CUDAContextScope scope(this); + cuda_assert( + cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size())); } } @@ -947,7 +946,7 @@ void CUDADevice::mem_zero(device_memory &mem) * regardless of mem.host_pointer and mem.shared_pointer. */ if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) { const CUDAContextScope scope(this); - cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size())); + cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size())); } else if (mem.host_pointer) { memset(mem.host_pointer, 0, mem.memory_size()); @@ -1273,7 +1272,7 @@ bool CUDADevice::denoising_non_local_means(device_ptr image_ptr, if (have_error()) return false; - CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer); + CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer; CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts; CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts; CUdeviceptr scale_ptr = 0; @@ -1405,7 +1404,7 @@ bool CUDADevice::denoising_accumulate(device_ptr color_ptr, if (have_error()) return false; - CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer); + CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer; CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts; CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian; @@ -1703,7 +1702,7 @@ void CUDADevice::path_trace(DeviceTask &task, wtile->h = rtile.h; wtile->offset = rtile.offset; wtile->stride = rtile.stride; - wtile->buffer = (float *)cuda_device_ptr(rtile.buffer); + wtile->buffer = (float *)(CUdeviceptr)rtile.buffer; /* Prepare work size. More step samples render faster, but for now we * remain conservative for GPUs connected to a display to avoid driver @@ -1727,7 +1726,7 @@ void CUDADevice::path_trace(DeviceTask &task, wtile->num_samples = min(step_samples, end_sample - sample); work_tiles.copy_to_device(); - CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); + CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer; uint total_work_size = wtile->w * wtile->h * wtile->num_samples; uint num_blocks = divide_up(total_work_size, num_threads_per_block); @@ -1762,7 +1761,7 @@ void CUDADevice::film_convert(DeviceTask &task, CUfunction cuFilmConvert; CUdeviceptr d_rgba = map_pixels((rgba_byte) ? rgba_byte : rgba_half); - CUdeviceptr d_buffer = cuda_device_ptr(buffer); + CUdeviceptr d_buffer = (CUdeviceptr)buffer; /* get kernel function */ if (rgba_half) { @@ -1823,8 +1822,8 @@ void CUDADevice::shader(DeviceTask &task) CUDAContextScope scope(this); CUfunction cuShader; - CUdeviceptr d_input = cuda_device_ptr(task.shader_input); - CUdeviceptr d_output = cuda_device_ptr(task.shader_output); + CUdeviceptr d_input = (CUdeviceptr)task.shader_input; + CUdeviceptr d_output = (CUdeviceptr)task.shader_output; /* get kernel function */ if (task.shader_eval_type >= SHADER_EVAL_BAKE) { @@ -1907,7 +1906,7 @@ CUdeviceptr CUDADevice::map_pixels(device_ptr mem) return buffer; } - return cuda_device_ptr(mem); + return (CUdeviceptr)mem; } void CUDADevice::unmap_pixels(device_ptr mem) @@ -2347,7 +2346,7 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory & /*kg*/, size_buffer.zero_to_device(); uint threads = num_threads; - CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer); + CUdeviceptr d_size = (CUdeviceptr)size_buffer.device_pointer; struct args_t { uint *num_threads; @@ -2382,13 +2381,13 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim { CUDAContextScope scope(device); - CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer); - CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer); - CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer); - CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer); - CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer); + CUdeviceptr d_split_data = (CUdeviceptr)split_data.device_pointer; + CUdeviceptr d_ray_state = (CUdeviceptr)ray_state.device_pointer; + CUdeviceptr d_queue_index = (CUdeviceptr)queue_index.device_pointer; + CUdeviceptr d_use_queues_flag = (CUdeviceptr)use_queues_flag.device_pointer; + CUdeviceptr d_work_pool_wgs = (CUdeviceptr)work_pool_wgs.device_pointer; - CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer); + CUdeviceptr d_buffer = (CUdeviceptr)rtile.buffer; int end_sample = rtile.start_sample + rtile.num_samples; int queue_size = dim.global_size[0] * dim.global_size[1]; |