diff options
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/cuda/device_impl.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/device/cuda/queue.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/device/cuda/queue.h | 4 | ||||
-rw-r--r-- | intern/cycles/device/hip/device_impl.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/device/hip/queue.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/device/hip/queue.h | 4 | ||||
-rw-r--r-- | intern/cycles/device/optix/device_impl.cpp | 88 | ||||
-rw-r--r-- | intern/cycles/device/optix/queue.cpp | 8 | ||||
-rw-r--r-- | intern/cycles/device/optix/queue.h | 4 | ||||
-rw-r--r-- | intern/cycles/device/queue.h | 70 |
10 files changed, 141 insertions, 61 deletions
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index e05fef3897c..ee55e6dc632 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -477,10 +477,10 @@ void CUDADevice::reserve_local_memory(const uint kernel_features) * still to make it faster. */ CUDADeviceQueue queue(this); - void *d_path_index = nullptr; - void *d_render_buffer = nullptr; + device_ptr d_path_index = 0; + device_ptr d_render_buffer = 0; int d_work_size = 0; - void *args[] = {&d_path_index, &d_render_buffer, &d_work_size}; + DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size); queue.init_execution(); queue.enqueue(test_kernel, 1, args); diff --git a/intern/cycles/device/cuda/queue.cpp b/intern/cycles/device/cuda/queue.cpp index 09352a84181..880d7ca4cf2 100644 --- a/intern/cycles/device/cuda/queue.cpp +++ b/intern/cycles/device/cuda/queue.cpp @@ -89,7 +89,9 @@ bool CUDADeviceQueue::kernel_available(DeviceKernel kernel) const return cuda_device_->kernels.available(kernel); } -bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[]) +bool CUDADeviceQueue::enqueue(DeviceKernel kernel, + const int work_size, + DeviceKernelArguments const &args) { if (cuda_device_->have_error()) { return false; @@ -133,7 +135,7 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *ar 1, shared_mem_bytes, cuda_stream_, - args, + const_cast<void**>(args.values), 0), "enqueue"); diff --git a/intern/cycles/device/cuda/queue.h b/intern/cycles/device/cuda/queue.h index 28613cda071..0836af12098 100644 --- a/intern/cycles/device/cuda/queue.h +++ b/intern/cycles/device/cuda/queue.h @@ -42,7 +42,9 @@ class CUDADeviceQueue : public DeviceQueue { virtual bool kernel_available(DeviceKernel kernel) const override; - virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override; + virtual bool enqueue(DeviceKernel kernel, + const int work_size, + DeviceKernelArguments const &args) override; virtual bool synchronize() override; diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index 53c4f3f0b3f..4f1cbabc89b 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -440,10 +440,10 @@ void HIPDevice::reserve_local_memory(const uint kernel_features) * still to make it faster. */ HIPDeviceQueue queue(this); - void *d_path_index = nullptr; - void *d_render_buffer = nullptr; + device_ptr d_path_index = 0; + device_ptr d_render_buffer = 0; int d_work_size = 0; - void *args[] = {&d_path_index, &d_render_buffer, &d_work_size}; + DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size); queue.init_execution(); queue.enqueue(test_kernel, 1, args); diff --git a/intern/cycles/device/hip/queue.cpp b/intern/cycles/device/hip/queue.cpp index 0f053ccbeb5..42841324ed6 100644 --- a/intern/cycles/device/hip/queue.cpp +++ b/intern/cycles/device/hip/queue.cpp @@ -89,7 +89,9 @@ bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const return hip_device_->kernels.available(kernel); } -bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[]) +bool HIPDeviceQueue::enqueue(DeviceKernel kernel, + const int work_size, + DeviceKernelArguments const &args) { if (hip_device_->have_error()) { return false; @@ -132,7 +134,7 @@ bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *arg 1, shared_mem_bytes, hip_stream_, - args, + const_cast<void**>(args.values), 0), "enqueue"); diff --git a/intern/cycles/device/hip/queue.h b/intern/cycles/device/hip/queue.h index 95d1afaff0f..8040d367798 100644 --- a/intern/cycles/device/hip/queue.h +++ b/intern/cycles/device/hip/queue.h @@ -42,7 +42,9 @@ class HIPDeviceQueue : public DeviceQueue { virtual bool kernel_available(DeviceKernel kernel) const override; - virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override; + virtual bool enqueue(DeviceKernel kernel, + const int work_size, + DeviceKernelArguments const &args) override; virtual bool synchronize() override; diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index b82b1281eb8..1d893d9c65b 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -667,22 +667,22 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context) const int work_size = buffer_params.width * buffer_params.height; - void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer), - const_cast<int *>(&context.guiding_params.pass_stride), - const_cast<int *>(&context.guiding_params.pass_albedo), - const_cast<int *>(&context.guiding_params.pass_normal), - &context.render_buffers->buffer.device_pointer, - const_cast<int *>(&buffer_params.offset), - const_cast<int *>(&buffer_params.stride), - const_cast<int *>(&buffer_params.pass_stride), - const_cast<int *>(&context.pass_sample_count), - const_cast<int *>(&context.pass_denoising_albedo), - const_cast<int *>(&context.pass_denoising_normal), - const_cast<int *>(&buffer_params.full_x), - const_cast<int *>(&buffer_params.full_y), - const_cast<int *>(&buffer_params.width), - const_cast<int *>(&buffer_params.height), - const_cast<int *>(&context.num_samples)}; + DeviceKernelArguments args(&context.guiding_params.device_pointer, + &context.guiding_params.pass_stride, + &context.guiding_params.pass_albedo, + &context.guiding_params.pass_normal, + &context.render_buffers->buffer.device_pointer, + &buffer_params.offset, + &buffer_params.stride, + &buffer_params.pass_stride, + &context.pass_sample_count, + &context.pass_denoising_albedo, + &context.pass_denoising_normal, + &buffer_params.full_x, + &buffer_params.full_y, + &buffer_params.width, + &buffer_params.height, + &context.num_samples); return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args); } @@ -693,11 +693,11 @@ bool OptiXDevice::denoise_filter_guiding_set_fake_albedo(DenoiseContext &context const int work_size = buffer_params.width * buffer_params.height; - void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer), - const_cast<int *>(&context.guiding_params.pass_stride), - const_cast<int *>(&context.guiding_params.pass_albedo), - const_cast<int *>(&buffer_params.width), - const_cast<int *>(&buffer_params.height)}; + DeviceKernelArguments args(&context.guiding_params.device_pointer, + &context.guiding_params.pass_stride, + &context.guiding_params.pass_albedo, + &buffer_params.width, + &buffer_params.height); return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args); } @@ -793,15 +793,15 @@ bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const const int work_size = buffer_params.width * buffer_params.height; - void *args[] = {&context.render_buffers->buffer.device_pointer, - const_cast<int *>(&buffer_params.full_x), - const_cast<int *>(&buffer_params.full_y), - const_cast<int *>(&buffer_params.width), - const_cast<int *>(&buffer_params.height), - const_cast<int *>(&buffer_params.offset), - const_cast<int *>(&buffer_params.stride), - const_cast<int *>(&buffer_params.pass_stride), - const_cast<int *>(&pass.denoised_offset)}; + DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer, + &buffer_params.full_x, + &buffer_params.full_y, + &buffer_params.width, + &buffer_params.height, + &buffer_params.offset, + &buffer_params.stride, + &buffer_params.pass_stride, + &pass.denoised_offset); return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args); } @@ -813,20 +813,20 @@ bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context, const int work_size = buffer_params.width * buffer_params.height; - void *args[] = {&context.render_buffers->buffer.device_pointer, - const_cast<int *>(&buffer_params.full_x), - const_cast<int *>(&buffer_params.full_y), - const_cast<int *>(&buffer_params.width), - const_cast<int *>(&buffer_params.height), - const_cast<int *>(&buffer_params.offset), - const_cast<int *>(&buffer_params.stride), - const_cast<int *>(&buffer_params.pass_stride), - const_cast<int *>(&context.num_samples), - const_cast<int *>(&pass.noisy_offset), - const_cast<int *>(&pass.denoised_offset), - const_cast<int *>(&context.pass_sample_count), - const_cast<int *>(&pass.num_components), - const_cast<bool *>(&pass.use_compositing)}; + DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer, + &buffer_params.full_x, + &buffer_params.full_y, + &buffer_params.width, + &buffer_params.height, + &buffer_params.offset, + &buffer_params.stride, + &buffer_params.pass_stride, + &context.num_samples, + &pass.noisy_offset, + &pass.denoised_offset, + &context.pass_sample_count, + &pass.num_components, + &pass.use_compositing); return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args); } diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp index e3946d94f5d..1a437878b5f 100644 --- a/intern/cycles/device/optix/queue.cpp +++ b/intern/cycles/device/optix/queue.cpp @@ -47,7 +47,9 @@ static bool is_optix_specific_kernel(DeviceKernel kernel) kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); } -bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[]) +bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, + const int work_size, + DeviceKernelArguments const &args) { if (!is_optix_specific_kernel(kernel)) { return CUDADeviceQueue::enqueue(kernel, work_size, args); @@ -69,7 +71,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a cuda_device_assert( cuda_device_, cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, path_index_array), - args[0], // &d_path_index + args.values[0], // &d_path_index sizeof(device_ptr), cuda_stream_)); @@ -78,7 +80,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a cuda_device_assert( cuda_device_, cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer), - args[1], // &d_render_buffer + args.values[1], // &d_render_buffer sizeof(device_ptr), cuda_stream_)); } diff --git a/intern/cycles/device/optix/queue.h b/intern/cycles/device/optix/queue.h index 0de422ccc71..5f0e09dff2c 100644 --- a/intern/cycles/device/optix/queue.h +++ b/intern/cycles/device/optix/queue.h @@ -31,7 +31,9 @@ class OptiXDeviceQueue : public CUDADeviceQueue { virtual void init_execution() override; - virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override; + virtual bool enqueue(DeviceKernel kernel, + const int work_size, + DeviceKernelArguments const &args) override; }; CCL_NAMESPACE_END diff --git a/intern/cycles/device/queue.h b/intern/cycles/device/queue.h index 188162f4b74..4e9f41f7875 100644 --- a/intern/cycles/device/queue.h +++ b/intern/cycles/device/queue.h @@ -31,6 +31,72 @@ class device_memory; struct KernelWorkTile; +/* Container for device kernel arguments with type correctness ensured by API. */ +struct DeviceKernelArguments { + + enum Type { + POINTER, + INT32, + FLOAT32, + BOOLEAN, + KERNEL_FILM_CONVERT, + }; + + static const int MAX_ARGS = 16; + Type types[MAX_ARGS]; + void *values[MAX_ARGS]; + size_t sizes[MAX_ARGS]; + size_t count = 0; + + DeviceKernelArguments() + { + } + + template<class T> DeviceKernelArguments(const T *arg) + { + add(arg); + } + + template<class T, class... Args> DeviceKernelArguments(const T *first, Args... args) + { + add(first); + add(args...); + } + + void add(const KernelFilmConvert *value) + { + add(KERNEL_FILM_CONVERT, value, sizeof(KernelFilmConvert)); + } + void add(const device_ptr *value) + { + add(POINTER, value, sizeof(device_ptr)); + } + void add(const int32_t *value) + { + add(INT32, value, sizeof(int32_t)); + } + void add(const float *value) + { + add(FLOAT32, value, sizeof(float)); + } + void add(const bool *value) + { + add(BOOLEAN, value, 4); + } + void add(const Type type, const void *value, size_t size) + { + types[count] = type; + values[count] = (void *)value; + sizes[count] = size; + count++; + } + template<typename T, typename... Args> void add(const T *first, Args... args) + { + add(first); + add(args...); + } +}; + /* Abstraction of a command queue for a device. * Provides API to schedule kernel execution in a specific queue with minimal possible overhead * from driver side. @@ -66,7 +132,9 @@ class DeviceQueue { * - int: pass pointer to the int * - device memory: pass pointer to device_memory.device_pointer * Return false if there was an error executing this or a previous kernel. */ - virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) = 0; + virtual bool enqueue(DeviceKernel kernel, + const int work_size, + DeviceKernelArguments const &args) = 0; /* Wait unit all enqueued kernels have finished execution. * Return false if there was an error executing any of the enqueued kernels. */ |