Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/integrator/path_trace_work_gpu.cpp')
-rw-r--r--intern/cycles/integrator/path_trace_work_gpu.cpp148
1 files changed, 78 insertions, 70 deletions
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
index 956aa6a8c90..e5062c6c47e 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -258,7 +258,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
* become busy after adding new tiles). This is especially important for the shadow catcher which
* schedules work in halves of available number of paths. */
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
-
+ work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
+ 0);
work_tile_scheduler_.reset(effective_buffer_params_,
start_sample,
samples_num,
@@ -333,7 +334,8 @@ DeviceKernel PathTraceWorkGPU::get_most_queued_kernel() const
void PathTraceWorkGPU::enqueue_reset()
{
- void *args[] = {&max_num_paths_};
+ DeviceKernelArguments args(&max_num_paths_);
+
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_RESET, max_num_paths_, args);
queue_->zero_to_device(integrator_queue_counter_);
queue_->zero_to_device(integrator_shader_sort_counter_);
@@ -404,7 +406,7 @@ bool PathTraceWorkGPU::enqueue_path_iteration()
void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num_paths_limit)
{
- void *d_path_index = (void *)NULL;
+ device_ptr d_path_index = 0;
/* Create array of path indices for which this kernel is queued to be executed. */
int work_size = kernel_max_active_main_path_index(kernel);
@@ -415,14 +417,14 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
if (kernel_uses_sorting(kernel)) {
/* Compute array of active paths, sorted by shader. */
work_size = num_queued;
- d_path_index = (void *)queued_paths_.device_pointer;
+ d_path_index = queued_paths_.device_pointer;
compute_sorted_queued_paths(
DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY, kernel, num_paths_limit);
}
else if (num_queued < work_size) {
work_size = num_queued;
- d_path_index = (void *)queued_paths_.device_pointer;
+ d_path_index = queued_paths_.device_pointer;
if (kernel_is_shadow_path(kernel)) {
/* Compute array of active shadow paths for specific kernel. */
@@ -441,8 +443,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
switch (kernel) {
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
/* Closest ray intersection kernels with integrator state and render buffer. */
- void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
- void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
+ DeviceKernelArguments args(&d_path_index, &buffers_->buffer.device_pointer, &work_size);
queue_->enqueue(kernel, work_size, args);
break;
@@ -452,7 +453,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
/* Ray intersection kernels with integrator state. */
- void *args[] = {&d_path_index, const_cast<int *>(&work_size)};
+ DeviceKernelArguments args(&d_path_index, &work_size);
queue_->enqueue(kernel, work_size, args);
break;
@@ -464,8 +465,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: {
/* Shading kernels with integrator state and render buffer. */
- void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
- void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
+ DeviceKernelArguments args(&d_path_index, &buffers_->buffer.device_pointer, &work_size);
queue_->enqueue(kernel, work_size, args);
break;
@@ -483,15 +483,17 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
const int num_paths_limit)
{
int d_queued_kernel = queued_kernel;
- void *d_counter = integrator_state_gpu_.sort_key_counter[d_queued_kernel];
- void *d_prefix_sum = (void *)integrator_shader_sort_prefix_sum_.device_pointer;
- assert(d_counter != nullptr && d_prefix_sum != nullptr);
+ device_ptr d_counter = (device_ptr)integrator_state_gpu_.sort_key_counter[d_queued_kernel];
+ device_ptr d_prefix_sum = integrator_shader_sort_prefix_sum_.device_pointer;
+ assert(d_counter != 0 && d_prefix_sum != 0);
/* Compute prefix sum of number of active paths with each shader. */
{
const int work_size = 1;
int max_shaders = device_scene_->data.max_shaders;
- void *args[] = {&d_counter, &d_prefix_sum, &max_shaders};
+
+ DeviceKernelArguments args(&d_counter, &d_prefix_sum, &max_shaders);
+
queue_->enqueue(DEVICE_KERNEL_PREFIX_SUM, work_size, args);
}
@@ -506,15 +508,16 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
* end of the array since compaction would need to do less work. */
const int work_size = kernel_max_active_main_path_index(queued_kernel);
- void *d_queued_paths = (void *)queued_paths_.device_pointer;
- void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
- void *args[] = {const_cast<int *>(&work_size),
- const_cast<int *>(&num_paths_limit),
- &d_queued_paths,
- &d_num_queued_paths,
- &d_counter,
- &d_prefix_sum,
- &d_queued_kernel};
+ device_ptr d_queued_paths = queued_paths_.device_pointer;
+ device_ptr d_num_queued_paths = num_queued_paths_.device_pointer;
+
+ DeviceKernelArguments args(&work_size,
+ &num_paths_limit,
+ &d_queued_paths,
+ &d_num_queued_paths,
+ &d_counter,
+ &d_prefix_sum,
+ &d_queued_kernel);
queue_->enqueue(kernel, work_size, args);
}
@@ -526,10 +529,10 @@ void PathTraceWorkGPU::compute_queued_paths(DeviceKernel kernel, DeviceKernel qu
/* Launch kernel to fill the active paths arrays. */
const int work_size = kernel_max_active_main_path_index(queued_kernel);
- void *d_queued_paths = (void *)queued_paths_.device_pointer;
- void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
- void *args[] = {
- const_cast<int *>(&work_size), &d_queued_paths, &d_num_queued_paths, &d_queued_kernel};
+ device_ptr d_queued_paths = queued_paths_.device_pointer;
+ device_ptr d_num_queued_paths = num_queued_paths_.device_pointer;
+
+ DeviceKernelArguments args(&work_size, &d_queued_paths, &d_num_queued_paths, &d_queued_kernel);
queue_->zero_to_device(num_queued_paths_);
queue_->enqueue(kernel, work_size, args);
@@ -605,15 +608,17 @@ void PathTraceWorkGPU::compact_paths(const int num_active_paths,
{
/* Compact fragmented path states into the start of the array, moving any paths
* with index higher than the number of active paths into the gaps. */
- void *d_compact_paths = (void *)queued_paths_.device_pointer;
- void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
+ device_ptr d_compact_paths = queued_paths_.device_pointer;
+ device_ptr d_num_queued_paths = num_queued_paths_.device_pointer;
/* Create array with terminated paths that we can write to. */
{
/* TODO: can the work size be reduced here? */
int offset = num_active_paths;
int work_size = num_active_paths;
- void *args[] = {&work_size, &d_compact_paths, &d_num_queued_paths, &offset};
+
+ DeviceKernelArguments args(&work_size, &d_compact_paths, &d_num_queued_paths, &offset);
+
queue_->zero_to_device(num_queued_paths_);
queue_->enqueue(terminated_paths_kernel, work_size, args);
}
@@ -622,8 +627,10 @@ void PathTraceWorkGPU::compact_paths(const int num_active_paths,
* than the number of active paths. */
{
int work_size = max_active_path_index;
- void *args[] = {
- &work_size, &d_compact_paths, &d_num_queued_paths, const_cast<int *>(&num_active_paths)};
+
+ DeviceKernelArguments args(
+ &work_size, &d_compact_paths, &d_num_queued_paths, &num_active_paths);
+
queue_->zero_to_device(num_queued_paths_);
queue_->enqueue(compact_paths_kernel, work_size, args);
}
@@ -638,8 +645,10 @@ void PathTraceWorkGPU::compact_paths(const int num_active_paths,
int work_size = num_compact_paths;
int active_states_offset = 0;
int terminated_states_offset = num_active_paths;
- void *args[] = {
- &d_compact_paths, &active_states_offset, &terminated_states_offset, &work_size};
+
+ DeviceKernelArguments args(
+ &d_compact_paths, &active_states_offset, &terminated_states_offset, &work_size);
+
queue_->enqueue(compact_kernel, work_size, args);
}
}
@@ -768,14 +777,12 @@ void PathTraceWorkGPU::enqueue_work_tiles(DeviceKernel kernel,
queue_->copy_to_device(work_tiles_);
- void *d_work_tiles = (void *)work_tiles_.device_pointer;
- void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
+ device_ptr d_work_tiles = work_tiles_.device_pointer;
+ device_ptr d_render_buffer = buffers_->buffer.device_pointer;
/* Launch kernel. */
- void *args[] = {&d_work_tiles,
- const_cast<int *>(&num_work_tiles),
- &d_render_buffer,
- const_cast<int *>(&max_tile_work_size)};
+ DeviceKernelArguments args(
+ &d_work_tiles, &num_work_tiles, &d_render_buffer, &max_tile_work_size);
queue_->enqueue(kernel, max_tile_work_size * num_work_tiles, args);
@@ -965,16 +972,16 @@ int PathTraceWorkGPU::adaptive_sampling_convergence_check_count_active(float thr
const int work_size = effective_buffer_params_.width * effective_buffer_params_.height;
- void *args[] = {&buffers_->buffer.device_pointer,
- const_cast<int *>(&effective_buffer_params_.full_x),
- const_cast<int *>(&effective_buffer_params_.full_y),
- const_cast<int *>(&effective_buffer_params_.width),
- const_cast<int *>(&effective_buffer_params_.height),
- &threshold,
- &reset,
- &effective_buffer_params_.offset,
- &effective_buffer_params_.stride,
- &num_active_pixels.device_pointer};
+ DeviceKernelArguments args(&buffers_->buffer.device_pointer,
+ &effective_buffer_params_.full_x,
+ &effective_buffer_params_.full_y,
+ &effective_buffer_params_.width,
+ &effective_buffer_params_.height,
+ &threshold,
+ &reset,
+ &effective_buffer_params_.offset,
+ &effective_buffer_params_.stride,
+ &num_active_pixels.device_pointer);
queue_->enqueue(DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK, work_size, args);
@@ -988,13 +995,13 @@ void PathTraceWorkGPU::enqueue_adaptive_sampling_filter_x()
{
const int work_size = effective_buffer_params_.height;
- void *args[] = {&buffers_->buffer.device_pointer,
- &effective_buffer_params_.full_x,
- &effective_buffer_params_.full_y,
- &effective_buffer_params_.width,
- &effective_buffer_params_.height,
- &effective_buffer_params_.offset,
- &effective_buffer_params_.stride};
+ DeviceKernelArguments args(&buffers_->buffer.device_pointer,
+ &effective_buffer_params_.full_x,
+ &effective_buffer_params_.full_y,
+ &effective_buffer_params_.width,
+ &effective_buffer_params_.height,
+ &effective_buffer_params_.offset,
+ &effective_buffer_params_.stride);
queue_->enqueue(DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X, work_size, args);
}
@@ -1003,13 +1010,13 @@ void PathTraceWorkGPU::enqueue_adaptive_sampling_filter_y()
{
const int work_size = effective_buffer_params_.width;
- void *args[] = {&buffers_->buffer.device_pointer,
- &effective_buffer_params_.full_x,
- &effective_buffer_params_.full_y,
- &effective_buffer_params_.width,
- &effective_buffer_params_.height,
- &effective_buffer_params_.offset,
- &effective_buffer_params_.stride};
+ DeviceKernelArguments args(&buffers_->buffer.device_pointer,
+ &effective_buffer_params_.full_x,
+ &effective_buffer_params_.full_y,
+ &effective_buffer_params_.width,
+ &effective_buffer_params_.height,
+ &effective_buffer_params_.offset,
+ &effective_buffer_params_.stride);
queue_->enqueue(DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y, work_size, args);
}
@@ -1018,10 +1025,10 @@ void PathTraceWorkGPU::cryptomatte_postproces()
{
const int work_size = effective_buffer_params_.width * effective_buffer_params_.height;
- void *args[] = {&buffers_->buffer.device_pointer,
- const_cast<int *>(&work_size),
- &effective_buffer_params_.offset,
- &effective_buffer_params_.stride};
+ DeviceKernelArguments args(&buffers_->buffer.device_pointer,
+ &work_size,
+ &effective_buffer_params_.offset,
+ &effective_buffer_params_.stride);
queue_->enqueue(DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS, work_size, args);
}
@@ -1070,8 +1077,9 @@ int PathTraceWorkGPU::shadow_catcher_count_possible_splits()
queue_->zero_to_device(num_queued_paths_);
const int work_size = max_active_main_path_index_;
- void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
- void *args[] = {const_cast<int *>(&work_size), &d_num_queued_paths};
+ device_ptr d_num_queued_paths = num_queued_paths_.device_pointer;
+
+ DeviceKernelArguments args(&work_size, &d_num_queued_paths);
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS, work_size, args);
queue_->copy_from_device(num_queued_paths_);