diff options
Diffstat (limited to 'intern')
30 files changed, 550 insertions, 277 deletions
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp index bc380f269ad..18aa5dda70d 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.cpp +++ b/intern/cycles/integrator/path_trace_work_gpu.cpp @@ -52,7 +52,11 @@ static size_t estimate_single_state_size() * For until then use common value. Currently this size is only used for logging, but is weak to * rely on this. */ #define KERNEL_STRUCT_VOLUME_STACK_SIZE 4 + #include "kernel/integrator/integrator_state_template.h" + +#include "kernel/integrator/integrator_shadow_state_template.h" + #undef KERNEL_STRUCT_BEGIN #undef KERNEL_STRUCT_MEMBER #undef KERNEL_STRUCT_ARRAY_MEMBER @@ -74,6 +78,8 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device, integrator_shader_sort_counter_(device, "integrator_shader_sort_counter", MEM_READ_WRITE), integrator_shader_raytrace_sort_counter_( device, "integrator_shader_raytrace_sort_counter", MEM_READ_WRITE), + integrator_next_shadow_path_index_( + device, "integrator_next_shadow_path_index", MEM_READ_WRITE), integrator_next_shadow_catcher_path_index_( device, "integrator_next_shadow_catcher_path_index", MEM_READ_WRITE), queued_paths_(device, "queued_paths", MEM_READ_WRITE), @@ -138,7 +144,11 @@ void PathTraceWorkGPU::alloc_integrator_soa() } \ } #define KERNEL_STRUCT_VOLUME_STACK_SIZE (integrator_state_soa_volume_stack_size_) + #include "kernel/integrator/integrator_state_template.h" + +#include "kernel/integrator/integrator_shadow_state_template.h" + #undef KERNEL_STRUCT_BEGIN #undef KERNEL_STRUCT_MEMBER #undef KERNEL_STRUCT_ARRAY_MEMBER @@ -199,16 +209,22 @@ void PathTraceWorkGPU::alloc_integrator_sorting() void PathTraceWorkGPU::alloc_integrator_path_split() { - if (integrator_next_shadow_catcher_path_index_.size() != 0) { - return; + if (integrator_next_shadow_path_index_.size() == 0) { + integrator_next_shadow_path_index_.alloc(1); + integrator_next_shadow_path_index_.zero_to_device(); + + integrator_state_gpu_.next_shadow_path_index = + (int *)integrator_next_shadow_path_index_.device_pointer; } - integrator_next_shadow_catcher_path_index_.alloc(1); - /* TODO(sergey): Use queue? */ - integrator_next_shadow_catcher_path_index_.zero_to_device(); + if (integrator_next_shadow_catcher_path_index_.size() == 0) { + integrator_next_shadow_catcher_path_index_.alloc(1); + integrator_next_shadow_path_index_.data()[0] = 0; + integrator_next_shadow_catcher_path_index_.zero_to_device(); - integrator_state_gpu_.next_shadow_catcher_path_index = - (int *)integrator_next_shadow_catcher_path_index_.device_pointer; + integrator_state_gpu_.next_shadow_catcher_path_index = + (int *)integrator_next_shadow_catcher_path_index_.device_pointer; + } } void PathTraceWorkGPU::alloc_work_memory() @@ -341,27 +357,45 @@ bool PathTraceWorkGPU::enqueue_path_iteration() return false; } - /* Finish shadows before potentially adding more shadow rays. We can only - * store one shadow ray in the integrator state. + /* If the number of shadow kernels dropped to zero, set the next shadow path + * index to zero as well. * - * When there is a shadow catcher in the scene finish shadow rays before invoking intersect - * closest kernel since so that the shadow paths are writing to the pre-split state. */ - if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME || - (has_shadow_catcher() && kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST)) { - if (queue_counter->num_queued[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW]) { - enqueue_path_iteration(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); - return true; + * TODO: use shadow path compaction to lower it more often instead of letting + * it fill up entirely? */ + const int num_queued_shadow = + queue_counter->num_queued[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW] + + queue_counter->num_queued[DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW]; + if (num_queued_shadow == 0) { + if (integrator_next_shadow_path_index_.data()[0] != 0) { + integrator_next_shadow_path_index_.data()[0] = 0; + queue_->copy_to_device(integrator_next_shadow_path_index_); } - else if (queue_counter->num_queued[DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW]) { - enqueue_path_iteration(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); - return true; + } + + /* For kernels that add shadow paths, check if there is enough space available. + * If not, schedule shadow kernels first to clear out the shadow paths. */ + if (kernel_creates_shadow_paths(kernel)) { + if (max_num_paths_ - integrator_next_shadow_path_index_.data()[0] < + queue_counter->num_queued[kernel]) { + if (queue_counter->num_queued[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW]) { + enqueue_path_iteration(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); + return true; + } + else if (queue_counter->num_queued[DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW]) { + enqueue_path_iteration(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + return true; + } } } /* Schedule kernel with maximum number of queued items. */ enqueue_path_iteration(kernel); + + /* Update next shadow path index for kernels that can add shadow paths. */ + if (kernel_creates_shadow_paths(kernel)) { + queue_->copy_from_device(integrator_next_shadow_path_index_); + } + return true; } @@ -370,13 +404,12 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel) void *d_path_index = (void *)NULL; /* Create array of path indices for which this kernel is queued to be executed. */ - int work_size = max_active_path_index_; + int work_size = kernel_max_active_path_index(kernel); IntegratorQueueCounter *queue_counter = integrator_queue_counter_.data(); int num_queued = queue_counter->num_queued[kernel]; - if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) { + if (kernel_uses_sorting(kernel)) { /* Compute array of active paths, sorted by shader. */ work_size = num_queued; d_path_index = (void *)queued_paths_.device_pointer; @@ -387,8 +420,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel) work_size = num_queued; d_path_index = (void *)queued_paths_.device_pointer; - if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) { + if (kernel_is_shadow_path(kernel)) { /* Compute array of active shadow paths for specific kernel. */ compute_queued_paths(DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY, kernel); } @@ -452,7 +484,7 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel, DeviceKe { /* TODO: this could be smaller for terminated paths based on amount of work we want * to schedule. */ - const int work_size = max_active_path_index_; + const int work_size = kernel_max_active_path_index(queued_kernel); void *d_queued_paths = (void *)queued_paths_.device_pointer; void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer; @@ -481,7 +513,7 @@ void PathTraceWorkGPU::compute_queued_paths(DeviceKernel kernel, DeviceKernel qu int d_queued_kernel = queued_kernel; /* Launch kernel to fill the active paths arrays. */ - const int work_size = max_active_path_index_; + const int work_size = kernel_max_active_path_index(queued_kernel); void *d_queued_paths = (void *)queued_paths_.device_pointer; void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer; void *args[] = { @@ -981,4 +1013,29 @@ int PathTraceWorkGPU::shadow_catcher_count_possible_splits() return num_queued_paths_.data()[0]; } +bool PathTraceWorkGPU::kernel_uses_sorting(DeviceKernel kernel) +{ + return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE); +} + +bool PathTraceWorkGPU::kernel_creates_shadow_paths(DeviceKernel kernel) +{ + return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); +} + +bool PathTraceWorkGPU::kernel_is_shadow_path(DeviceKernel kernel) +{ + return (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); +} + +int PathTraceWorkGPU::kernel_max_active_path_index(DeviceKernel kernel) +{ + return (kernel_is_shadow_path(kernel)) ? integrator_next_shadow_path_index_.data()[0] : + max_active_path_index_; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/integrator/path_trace_work_gpu.h b/intern/cycles/integrator/path_trace_work_gpu.h index e66851cc8d8..dd2c1c197ae 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.h +++ b/intern/cycles/integrator/path_trace_work_gpu.h @@ -113,6 +113,12 @@ class PathTraceWorkGPU : public PathTraceWork { /* Count how many currently scheduled paths can still split. */ int shadow_catcher_count_possible_splits(); + /* Kernel properties. */ + bool kernel_uses_sorting(DeviceKernel kernel); + bool kernel_creates_shadow_paths(DeviceKernel kernel); + bool kernel_is_shadow_path(DeviceKernel kernel); + int kernel_max_active_path_index(DeviceKernel kernel); + /* Integrator queue. */ unique_ptr<DeviceQueue> queue_; @@ -131,6 +137,7 @@ class PathTraceWorkGPU : public PathTraceWork { device_vector<int> integrator_shader_sort_counter_; device_vector<int> integrator_shader_raytrace_sort_counter_; /* Path split. */ + device_vector<int> integrator_next_shadow_path_index_; device_vector<int> integrator_next_shadow_catcher_path_index_; /* Temporary buffer to get an array of queued path for a particular kernel. */ diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index e0d48361650..7357c5804ed 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -236,6 +236,7 @@ set(SRC_INTEGRATOR_HEADERS integrator/integrator_shade_shadow.h integrator/integrator_shade_surface.h integrator/integrator_shade_volume.h + integrator/integrator_shadow_state_template.h integrator/integrator_state.h integrator/integrator_state_flow.h integrator/integrator_state_template.h diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index bf8667ac045..2b0eea4fb61 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -69,6 +69,18 @@ CCL_NAMESPACE_BEGIN # define KERNEL_INVOKE(name, ...) integrator_##name(__VA_ARGS__) #endif +/* TODO: Either use something like get_work_pixel(), or simplify tile which is passed here, so + * that it does not contain unused fields. */ +#define DEFINE_INTEGRATOR_INIT_KERNEL(name) \ + bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \ + IntegratorStateCPU *state, \ + KernelWorkTile *tile, \ + ccl_global float *render_buffer) \ + { \ + return KERNEL_INVOKE( \ + name, kg, state, tile, render_buffer, tile->x, tile->y, tile->start_sample); \ + } + #define DEFINE_INTEGRATOR_KERNEL(name) \ void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \ IntegratorStateCPU *state) \ @@ -83,30 +95,32 @@ CCL_NAMESPACE_BEGIN KERNEL_INVOKE(name, kg, state, render_buffer); \ } -/* TODO: Either use something like get_work_pixel(), or simplify tile which is passed here, so - * that it does not contain unused fields. */ -#define DEFINE_INTEGRATOR_INIT_KERNEL(name) \ - bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \ - IntegratorStateCPU *state, \ - KernelWorkTile *tile, \ - ccl_global float *render_buffer) \ +#define DEFINE_INTEGRATOR_SHADOW_KERNEL(name) \ + void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \ + IntegratorStateCPU *state) \ { \ - return KERNEL_INVOKE( \ - name, kg, state, tile, render_buffer, tile->x, tile->y, tile->start_sample); \ + KERNEL_INVOKE(name, kg, &state->shadow); \ + } + +#define DEFINE_INTEGRATOR_SHADOW_SHADE_KERNEL(name) \ + void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \ + const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \ + { \ + KERNEL_INVOKE(name, kg, &state->shadow, render_buffer); \ } DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera) DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake) DEFINE_INTEGRATOR_KERNEL(intersect_closest) -DEFINE_INTEGRATOR_KERNEL(intersect_shadow) DEFINE_INTEGRATOR_KERNEL(intersect_subsurface) DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack) DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background) DEFINE_INTEGRATOR_SHADE_KERNEL(shade_light) -DEFINE_INTEGRATOR_SHADE_KERNEL(shade_shadow) DEFINE_INTEGRATOR_SHADE_KERNEL(shade_surface) DEFINE_INTEGRATOR_SHADE_KERNEL(shade_volume) DEFINE_INTEGRATOR_SHADE_KERNEL(megakernel) +DEFINE_INTEGRATOR_SHADOW_KERNEL(intersect_shadow) +DEFINE_INTEGRATOR_SHADOW_SHADE_KERNEL(shade_shadow) /* -------------------------------------------------------------------- * Shader evaluation. diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index b5ecab2a4db..6b4d79ed5b7 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -265,8 +265,7 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B { gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( num_states, indices, num_indices, [](const int state) { - return (INTEGRATOR_STATE(state, path, queued_kernel) != 0) || - (INTEGRATOR_STATE(state, shadow_path, queued_kernel) != 0); + return (INTEGRATOR_STATE(state, path, queued_kernel) != 0); }); } @@ -278,8 +277,7 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B { gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( num_states, indices + indices_offset, num_indices, [](const int state) { - return (INTEGRATOR_STATE(state, path, queued_kernel) == 0) && - (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0); + return (INTEGRATOR_STATE(state, path, queued_kernel) == 0); }); } @@ -303,9 +301,7 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B { gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( num_states, indices, num_indices, [num_active_paths](const int state) { - return (state >= num_active_paths) && - ((INTEGRATOR_STATE(state, path, queued_kernel) != 0) || - (INTEGRATOR_STATE(state, shadow_path, queued_kernel) != 0)); + return (state >= num_active_paths) && (INTEGRATOR_STATE(state, path, queued_kernel) != 0); }); } diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h index 317ea76553a..ef8dcb50115 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h @@ -136,13 +136,6 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel( else { INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } - - /* If the split happened after bounce through a transparent object it's possible to have shadow - * patch. Make sure it is properly re-scheduled on the split path. */ - const int shadow_kernel = INTEGRATOR_STATE(state, shadow_path, queued_kernel); - if (shadow_kernel != 0) { - INTEGRATOR_SHADOW_PATH_INIT(shadow_kernel); - } } #endif } diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h index 06f58f88bc8..9dc0eb02c9b 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h @@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN /* Visibility for the shadow ray. */ ccl_device_forceinline uint integrate_intersect_shadow_visibility(KernelGlobals kg, - ConstIntegratorState state) + ConstIntegratorShadowState state) { uint visibility = PATH_RAY_SHADOW; @@ -33,7 +33,7 @@ ccl_device_forceinline uint integrate_intersect_shadow_visibility(KernelGlobals } ccl_device bool integrate_intersect_shadow_opaque(KernelGlobals kg, - IntegratorState state, + IntegratorShadowState state, ccl_private const Ray *ray, const uint visibility) { @@ -55,7 +55,7 @@ ccl_device bool integrate_intersect_shadow_opaque(KernelGlobals kg, } ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals kg, - ConstIntegratorState state) + ConstIntegratorShadowState state) { const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; const int transparent_bounce = INTEGRATOR_STATE(state, shadow_path, transparent_bounce); @@ -65,7 +65,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals k #ifdef __TRANSPARENT_SHADOWS__ ccl_device bool integrate_intersect_shadow_transparent(KernelGlobals kg, - IntegratorState state, + IntegratorShadowState state, ccl_private const Ray *ray, const uint visibility) { @@ -106,7 +106,7 @@ ccl_device bool integrate_intersect_shadow_transparent(KernelGlobals kg, } #endif -ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorState state) +ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowState state) { PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW); diff --git a/intern/cycles/kernel/integrator/integrator_megakernel.h b/intern/cycles/kernel/integrator/integrator_megakernel.h index a3b2b1f9e90..6e3220aa3b7 100644 --- a/intern/cycles/kernel/integrator/integrator_megakernel.h +++ b/intern/cycles/kernel/integrator/integrator_megakernel.h @@ -39,14 +39,17 @@ ccl_device void integrator_megakernel(KernelGlobals kg, * TODO: investigate if we can use device side enqueue for GPUs to avoid * having to compile this big kernel. */ while (true) { - if (INTEGRATOR_STATE(state, shadow_path, queued_kernel)) { + const uint32_t shadow_queued_kernel = INTEGRATOR_STATE( + &state->shadow, shadow_path, queued_kernel); + + if (shadow_queued_kernel) { /* First handle any shadow paths before we potentially create more shadow paths. */ - switch (INTEGRATOR_STATE(state, shadow_path, queued_kernel)) { + switch (shadow_queued_kernel) { case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW: - integrator_intersect_shadow(kg, state); + integrator_intersect_shadow(kg, &state->shadow); break; case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW: - integrator_shade_shadow(kg, state, render_buffer); + integrator_shade_shadow(kg, &state->shadow, render_buffer); break; default: kernel_assert(0); diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h index cdbe85f6b8c..94900754b76 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h +++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h @@ -30,7 +30,7 @@ ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits) #ifdef __TRANSPARENT_SHADOWS__ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, - IntegratorState state, + IntegratorShadowState state, const int hit) { PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE); @@ -69,7 +69,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, # ifdef __VOLUME__ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, - IntegratorState state, + IntegratorShadowState state, const int hit, const int num_recorded_hits, ccl_private float3 *ccl_restrict @@ -97,14 +97,14 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, shader_setup_from_volume(kg, shadow_sd, &ray); const float step_size = volume_stack_step_size( - kg, state, [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); }); + kg, [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); }); volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size); } # endif ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, - IntegratorState state, + IntegratorShadowState state, const int num_hits) { /* Accumulate shadow for transparent surfaces. */ @@ -158,7 +158,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, #endif /* __TRANSPARENT_SHADOWS__ */ ccl_device void integrator_shade_shadow(KernelGlobals kg, - IntegratorState state, + IntegratorShadowState state, ccl_global float *ccl_restrict render_buffer) { PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SETUP); diff --git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h index 08580645984..0108ba1373c 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_surface.h +++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h @@ -167,17 +167,20 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, light_sample_to_surface_shadow_ray(kg, sd, &ls, &ray); const bool is_light = light_sample_is_light(&ls); + /* Branch off shadow kernel. */ + INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); + /* Copy volume stack and enter/exit volume. */ - integrator_state_copy_volume_stack_to_shadow(kg, state); + integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); if (is_transmission) { # ifdef __VOLUME__ - shadow_volume_stack_enter_exit(kg, state, sd); + shadow_volume_stack_enter_exit(kg, shadow_state, sd); # endif } /* Write shadow ray and associated state to global memory. */ - integrator_state_write_shadow_ray(kg, state, &ray); + integrator_state_write_shadow_ray(kg, shadow_state, &ray); /* Copy state from main path to shadow path. */ const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce); @@ -191,20 +194,32 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, const float3 diffuse_glossy_ratio = (bounce == 0) ? bsdf_eval_diffuse_glossy_ratio(&bsdf_eval) : INTEGRATOR_STATE(state, path, diffuse_glossy_ratio); - INTEGRATOR_STATE_WRITE(state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio; } - INTEGRATOR_STATE_WRITE(state, shadow_path, flag) = shadow_flag; - INTEGRATOR_STATE_WRITE(state, shadow_path, bounce) = bounce; - INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) = transparent_bounce; - INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE( + state, path, render_pixel_index); + INTEGRATOR_STATE_WRITE( + shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(state, path, rng_offset) - + PRNG_BOUNCE_NUM * transparent_bounce; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE( + state, path, rng_hash); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE( + state, path, sample); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, flag) = shadow_flag; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, bounce) = bounce; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transparent_bounce) = transparent_bounce; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_bounce) = INTEGRATOR_STATE( + state, path, diffuse_bounce); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE( + state, path, glossy_bounce); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE( + state, path, transmission_bounce); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, throughput) = throughput; if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) { - INTEGRATOR_STATE_WRITE(state, shadow_path, unshadowed_throughput) = throughput; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unshadowed_throughput) = throughput; } - - /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); } #endif diff --git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h index d0dde815b5c..13a5e7bda05 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_volume.h +++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h @@ -71,7 +71,7 @@ typedef struct VolumeShaderCoefficients { /* Evaluate shader to get extinction coefficient at P. */ ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg, - IntegratorState state, + IntegratorShadowState state, ccl_private ShaderData *ccl_restrict sd, ccl_private float3 *ccl_restrict extinction) { @@ -187,7 +187,7 @@ ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState stat /* heterogeneous volume: integrate stepping through the volume until we * reach the end, get absorbed entirely, or run out of iterations */ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, - IntegratorState state, + IntegratorShadowState state, ccl_private Ray *ccl_restrict ray, ccl_private ShaderData *ccl_restrict sd, ccl_private float3 *ccl_restrict throughput, @@ -775,8 +775,11 @@ ccl_device_forceinline void integrate_volume_direct_light( light_sample_to_volume_shadow_ray(kg, sd, ls, P, &ray); const bool is_light = light_sample_is_light(ls); + /* Branch off shadow kernel. */ + INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); + /* Write shadow ray and associated state to global memory. */ - integrator_state_write_shadow_ray(kg, state, &ray); + integrator_state_write_shadow_ray(kg, shadow_state, &ray); /* Copy state from main path to shadow path. */ const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce); @@ -790,22 +793,34 @@ ccl_device_forceinline void integrate_volume_direct_light( const float3 diffuse_glossy_ratio = (bounce == 0) ? one_float3() : INTEGRATOR_STATE(state, path, diffuse_glossy_ratio); - INTEGRATOR_STATE_WRITE(state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio; } - INTEGRATOR_STATE_WRITE(state, shadow_path, flag) = shadow_flag; - INTEGRATOR_STATE_WRITE(state, shadow_path, bounce) = bounce; - INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) = transparent_bounce; - INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput_phase; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE( + state, path, render_pixel_index); + INTEGRATOR_STATE_WRITE( + shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(state, path, rng_offset) - + PRNG_BOUNCE_NUM * transparent_bounce; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE( + state, path, rng_hash); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE( + state, path, sample); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, flag) = shadow_flag; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, bounce) = bounce; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transparent_bounce) = transparent_bounce; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_bounce) = INTEGRATOR_STATE( + state, path, diffuse_bounce); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE( + state, path, glossy_bounce); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE( + state, path, transmission_bounce); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, throughput) = throughput_phase; if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) { - INTEGRATOR_STATE_WRITE(state, shadow_path, unshadowed_throughput) = throughput; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unshadowed_throughput) = throughput; } - integrator_state_copy_volume_stack_to_shadow(kg, state); - - /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); + integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); } # endif @@ -902,7 +917,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, /* Step through volume. */ const float step_size = volume_stack_step_size( - kg, state, [=](const int i) { return integrator_state_read_volume_stack(state, i); }); + kg, [=](const int i) { return integrator_state_read_volume_stack(state, i); }); /* TODO: expensive to zero closures? */ VolumeIntegrateResult result = {}; diff --git a/intern/cycles/kernel/integrator/integrator_shadow_state_template.h b/intern/cycles/kernel/integrator/integrator_shadow_state_template.h new file mode 100644 index 00000000000..bc35b644ee1 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_shadow_state_template.h @@ -0,0 +1,83 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/********************************* Shadow Path State **************************/ + +KERNEL_STRUCT_BEGIN(shadow_path) +/* Index of a pixel within the device render buffer. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRACING) +/* Current sample number. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, sample, KERNEL_FEATURE_PATH_TRACING) +/* Random number generator seed. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING) +/* Random number dimension offset. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING) +/* Current ray bounce depth. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current transparent ray bounce depth. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current diffuse ray bounce depth. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, diffuse_bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current glossy ray bounce depth. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, glossy_bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current transmission ray bounce depth. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transmission_bounce, KERNEL_FEATURE_PATH_TRACING) +/* DeviceKernel bit indicating queued kernels. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING) +/* enum PathRayFlag */ +KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) +/* Throughput. */ +KERNEL_STRUCT_MEMBER(shadow_path, float3, throughput, KERNEL_FEATURE_PATH_TRACING) +/* Throughput for shadow pass. */ +KERNEL_STRUCT_MEMBER(shadow_path, float3, unshadowed_throughput, KERNEL_FEATURE_SHADOW_PASS) +/* Ratio of throughput to distinguish diffuse and glossy render passes. */ +KERNEL_STRUCT_MEMBER(shadow_path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES) +/* Number of intersections found by ray-tracing. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_END(shadow_path) + +/********************************** Shadow Ray *******************************/ + +KERNEL_STRUCT_BEGIN(shadow_ray) +KERNEL_STRUCT_MEMBER(shadow_ray, float3, P, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float3, D, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_END(shadow_ray) + +/*********************** Shadow Intersection result **************************/ + +/* Result from scene intersection. */ +KERNEL_STRUCT_BEGIN(shadow_isect) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, u, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, v, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, prim, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_END_ARRAY(shadow_isect, + INTEGRATOR_SHADOW_ISECT_SIZE_CPU, + INTEGRATOR_SHADOW_ISECT_SIZE_GPU) + +/**************************** Shadow Volume Stack *****************************/ + +KERNEL_STRUCT_BEGIN(shadow_volume_stack) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, object, KERNEL_FEATURE_VOLUME) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, shader, KERNEL_FEATURE_VOLUME) +KERNEL_STRUCT_END_ARRAY(shadow_volume_stack, + KERNEL_STRUCT_VOLUME_STACK_SIZE, + KERNEL_STRUCT_VOLUME_STACK_SIZE) diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h index 3aab456a021..84f34c6b986 100644 --- a/intern/cycles/kernel/integrator/integrator_state.h +++ b/intern/cycles/kernel/integrator/integrator_state.h @@ -66,6 +66,25 @@ CCL_NAMESPACE_BEGIN /* Integrator State * * CPU rendering path state with AoS layout. */ +typedef struct IntegratorShadowStateCPU { +#define KERNEL_STRUCT_BEGIN(name) struct { +#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name; +#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER +#define KERNEL_STRUCT_END(name) \ + } \ + name; +#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \ + } \ + name[cpu_size]; +#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE +#include "kernel/integrator/integrator_shadow_state_template.h" +#undef KERNEL_STRUCT_BEGIN +#undef KERNEL_STRUCT_MEMBER +#undef KERNEL_STRUCT_ARRAY_MEMBER +#undef KERNEL_STRUCT_END +#undef KERNEL_STRUCT_END_ARRAY +} IntegratorShadowStateCPU; + typedef struct IntegratorStateCPU { #define KERNEL_STRUCT_BEGIN(name) struct { #define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name; @@ -84,6 +103,8 @@ typedef struct IntegratorStateCPU { #undef KERNEL_STRUCT_END #undef KERNEL_STRUCT_END_ARRAY #undef KERNEL_STRUCT_VOLUME_STACK_SIZE + + IntegratorShadowStateCPU shadow; } IntegratorStateCPU; /* Path Queue @@ -108,7 +129,11 @@ typedef struct IntegratorStateGPU { } \ name[gpu_size]; #define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE + #include "kernel/integrator/integrator_state_template.h" + +#include "kernel/integrator/integrator_shadow_state_template.h" + #undef KERNEL_STRUCT_BEGIN #undef KERNEL_STRUCT_MEMBER #undef KERNEL_STRUCT_ARRAY_MEMBER @@ -122,7 +147,10 @@ typedef struct IntegratorStateGPU { /* Count number of kernels queued for specific shaders. */ ccl_global int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM]; - /* Index of path which will be used by a next shadow catcher split. */ + /* Index of shadow path which will be used by a next shadow path. */ + ccl_global int *next_shadow_path_index; + + /* Index of main path which will be used by a next shadow catcher split. */ ccl_global int *next_shadow_catcher_path_index; } IntegratorStateGPU; @@ -140,6 +168,8 @@ typedef struct IntegratorStateGPU { typedef IntegratorStateCPU *ccl_restrict IntegratorState; typedef const IntegratorStateCPU *ccl_restrict ConstIntegratorState; +typedef IntegratorShadowStateCPU *ccl_restrict IntegratorShadowState; +typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState; # define INTEGRATOR_STATE_NULL nullptr @@ -157,6 +187,8 @@ typedef const IntegratorStateCPU *ccl_restrict ConstIntegratorState; typedef const int IntegratorState; typedef const int ConstIntegratorState; +typedef const int IntegratorShadowState; +typedef const int ConstIntegratorShadowState; # define INTEGRATOR_STATE_NULL -1 diff --git a/intern/cycles/kernel/integrator/integrator_state_flow.h b/intern/cycles/kernel/integrator/integrator_state_flow.h index 9829da875eb..df8fb5e0e46 100644 --- a/intern/cycles/kernel/integrator/integrator_state_flow.h +++ b/intern/cycles/kernel/integrator/integrator_state_flow.h @@ -63,10 +63,12 @@ CCL_NAMESPACE_BEGIN &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; -# define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \ +# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel) \ + IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \ + &kernel_integrator_state.next_shadow_path_index[0], 1); \ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ 1); \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; # define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ atomic_fetch_and_sub_uint32( \ &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ @@ -127,8 +129,9 @@ CCL_NAMESPACE_BEGIN (void)current_kernel; \ } -# define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; +# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel) \ + IntegratorShadowState shadow_state = &state->shadow; \ + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; # define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ { \ INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \ diff --git a/intern/cycles/kernel/integrator/integrator_state_template.h b/intern/cycles/kernel/integrator/integrator_state_template.h index d9801574d4f..b1a6fd36fae 100644 --- a/intern/cycles/kernel/integrator/integrator_state_template.h +++ b/intern/cycles/kernel/integrator/integrator_state_template.h @@ -28,6 +28,8 @@ KERNEL_STRUCT_MEMBER(path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRA KERNEL_STRUCT_MEMBER(path, uint16_t, sample, KERNEL_FEATURE_PATH_TRACING) /* Current ray bounce depth. */ KERNEL_STRUCT_MEMBER(path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current transparent ray bounce depth. */ +KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING) /* Current diffuse ray bounce depth. */ KERNEL_STRUCT_MEMBER(path, uint16_t, diffuse_bounce, KERNEL_FEATURE_PATH_TRACING) /* Current glossy ray bounce depth. */ @@ -38,8 +40,6 @@ KERNEL_STRUCT_MEMBER(path, uint16_t, transmission_bounce, KERNEL_FEATURE_PATH_TR KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounce, KERNEL_FEATURE_PATH_TRACING) /* Current volume bounds ray bounce depth. */ KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounds_bounce, KERNEL_FEATURE_PATH_TRACING) -/* Current transparent ray bounce depth. */ -KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING) /* DeviceKernel bit indicating queued kernels. */ KERNEL_STRUCT_MEMBER(path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING) /* Random number generator seed. */ @@ -107,57 +107,3 @@ KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, shader, KERNEL_FEATURE_VOLUME) KERNEL_STRUCT_END_ARRAY(volume_stack, KERNEL_STRUCT_VOLUME_STACK_SIZE, KERNEL_STRUCT_VOLUME_STACK_SIZE) - -/********************************* Shadow Path State **************************/ - -KERNEL_STRUCT_BEGIN(shadow_path) -/* Current ray bounce depth. */ -KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING) -/* Current transparent ray bounce depth. */ -KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING) -/* DeviceKernel bit indicating queued kernels. */ -KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING) -/* enum PathRayFlag */ -KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) -/* Throughput. */ -KERNEL_STRUCT_MEMBER(shadow_path, float3, throughput, KERNEL_FEATURE_PATH_TRACING) -/* Throughput for shadow pass. */ -KERNEL_STRUCT_MEMBER(shadow_path, float3, unshadowed_throughput, KERNEL_FEATURE_SHADOW_PASS) -/* Ratio of throughput to distinguish diffuse and glossy render passes. */ -KERNEL_STRUCT_MEMBER(shadow_path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES) -/* Number of intersections found by ray-tracing. */ -KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_END(shadow_path) - -/********************************** Shadow Ray *******************************/ - -KERNEL_STRUCT_BEGIN(shadow_ray) -KERNEL_STRUCT_MEMBER(shadow_ray, float3, P, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(shadow_ray, float3, D, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_END(shadow_ray) - -/*********************** Shadow Intersection result **************************/ - -/* Result from scene intersection. */ -KERNEL_STRUCT_BEGIN(shadow_isect) -KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, t, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, u, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, v, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, prim, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_END_ARRAY(shadow_isect, - INTEGRATOR_SHADOW_ISECT_SIZE_CPU, - INTEGRATOR_SHADOW_ISECT_SIZE_GPU) - -/**************************** Shadow Volume Stack *****************************/ - -KERNEL_STRUCT_BEGIN(shadow_volume_stack) -KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, object, KERNEL_FEATURE_VOLUME) -KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, shader, KERNEL_FEATURE_VOLUME) -KERNEL_STRUCT_END_ARRAY(shadow_volume_stack, - KERNEL_STRUCT_VOLUME_STACK_SIZE, - KERNEL_STRUCT_VOLUME_STACK_SIZE) diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h index dacc21e6eeb..5bcb9cc2d67 100644 --- a/intern/cycles/kernel/integrator/integrator_state_util.h +++ b/intern/cycles/kernel/integrator/integrator_state_util.h @@ -50,7 +50,7 @@ ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg, /* Shadow Ray */ ccl_device_forceinline void integrator_state_write_shadow_ray( - KernelGlobals kg, IntegratorState state, ccl_private const Ray *ccl_restrict ray) + KernelGlobals kg, IntegratorShadowState state, ccl_private const Ray *ccl_restrict ray) { INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P; INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D; @@ -60,7 +60,7 @@ ccl_device_forceinline void integrator_state_write_shadow_ray( } ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorShadowState state, ccl_private Ray *ccl_restrict ray) { ray->P = INTEGRATOR_STATE(state, shadow_ray, P); @@ -122,7 +122,9 @@ ccl_device_forceinline bool integrator_state_volume_stack_is_empty(KernelGlobals /* Shadow Intersection */ ccl_device_forceinline void integrator_state_write_shadow_isect( - IntegratorState state, ccl_private const Intersection *ccl_restrict isect, const int index) + IntegratorShadowState state, + ccl_private const Intersection *ccl_restrict isect, + const int index) { INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, t) = isect->t; INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, u) = isect->u; @@ -133,7 +135,9 @@ ccl_device_forceinline void integrator_state_write_shadow_isect( } ccl_device_forceinline void integrator_state_read_shadow_isect( - ConstIntegratorState state, ccl_private Intersection *ccl_restrict isect, const int index) + ConstIntegratorShadowState state, + ccl_private Intersection *ccl_restrict isect, + const int index) { isect->prim = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, prim); isect->object = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, object); @@ -143,8 +147,8 @@ ccl_device_forceinline void integrator_state_read_shadow_isect( isect->t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, t); } -ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(KernelGlobals kg, - IntegratorState state) +ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow( + KernelGlobals kg, IntegratorShadowState shadow_state, ConstIntegratorState state) { if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) { int index = 0; @@ -152,9 +156,9 @@ ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(KernelG do { shader = INTEGRATOR_STATE_ARRAY(state, volume_stack, index, shader); - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, index, object) = + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_volume_stack, index, object) = INTEGRATOR_STATE_ARRAY(state, volume_stack, index, object); - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, index, shader) = shader; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_volume_stack, index, shader) = shader; ++index; } while (shader != OBJECT_NONE); @@ -181,7 +185,7 @@ ccl_device_forceinline void integrator_state_copy_volume_stack(KernelGlobals kg, } ccl_device_forceinline VolumeStack -integrator_state_read_shadow_volume_stack(ConstIntegratorState state, int i) +integrator_state_read_shadow_volume_stack(ConstIntegratorShadowState state, int i) { VolumeStack entry = {INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, object), INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, shader)}; @@ -189,14 +193,14 @@ integrator_state_read_shadow_volume_stack(ConstIntegratorState state, int i) } ccl_device_forceinline bool integrator_state_shadow_volume_stack_is_empty( - KernelGlobals kg, ConstIntegratorState state) + KernelGlobals kg, ConstIntegratorShadowState state) { return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ? INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, 0, shader) == SHADER_NONE : true; } -ccl_device_forceinline void integrator_state_write_shadow_volume_stack(IntegratorState state, +ccl_device_forceinline void integrator_state_write_shadow_volume_stack(IntegratorShadowState state, int i, VolumeStack entry) { @@ -259,7 +263,6 @@ ccl_device_inline void integrator_state_move(KernelGlobals kg, integrator_state_copy_only(kg, to_state, state); INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; } #endif @@ -270,12 +273,11 @@ ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg, IntegratorState state) { #if defined(__KERNEL_GPU__) - const IntegratorState to_state = atomic_fetch_and_add_uint32( + ConstIntegratorState to_state = atomic_fetch_and_add_uint32( &kernel_integrator_state.next_shadow_catcher_path_index[0], 1); integrator_state_copy_only(kg, to_state, state); #else - IntegratorStateCPU *ccl_restrict to_state = state + 1; /* Only copy the required subset, since shadow intersections are big and irrelevant here. */ @@ -283,10 +285,99 @@ ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg, to_state->ray = state->ray; to_state->isect = state->isect; integrator_state_copy_volume_stack(kg, to_state, state); - to_state->shadow_path = state->shadow_path; #endif INTEGRATOR_STATE_WRITE(to_state, path, flag) |= PATH_RAY_SHADOW_CATCHER_PASS; } +#ifdef __KERNEL_CPU__ +ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int) +{ + return INTEGRATOR_STATE(state, path, bounce); +} + +ccl_device_inline int integrator_state_bounce(ConstIntegratorShadowState state, const int) +{ + return INTEGRATOR_STATE(state, shadow_path, bounce); +} + +ccl_device_inline int integrator_state_diffuse_bounce(ConstIntegratorState state, const int) +{ + return INTEGRATOR_STATE(state, path, diffuse_bounce); +} + +ccl_device_inline int integrator_state_diffuse_bounce(ConstIntegratorShadowState state, const int) +{ + return INTEGRATOR_STATE(state, shadow_path, diffuse_bounce); +} + +ccl_device_inline int integrator_state_glossy_bounce(ConstIntegratorState state, const int) +{ + return INTEGRATOR_STATE(state, path, glossy_bounce); +} + +ccl_device_inline int integrator_state_glossy_bounce(ConstIntegratorShadowState state, const int) +{ + return INTEGRATOR_STATE(state, shadow_path, glossy_bounce); +} + +ccl_device_inline int integrator_state_transmission_bounce(ConstIntegratorState state, const int) +{ + return INTEGRATOR_STATE(state, path, transmission_bounce); +} + +ccl_device_inline int integrator_state_transmission_bounce(ConstIntegratorShadowState state, + const int) +{ + return INTEGRATOR_STATE(state, shadow_path, transmission_bounce); +} + +ccl_device_inline int integrator_state_transparent_bounce(ConstIntegratorState state, const int) +{ + return INTEGRATOR_STATE(state, path, transparent_bounce); +} + +ccl_device_inline int integrator_state_transparent_bounce(ConstIntegratorShadowState state, + const int) +{ + return INTEGRATOR_STATE(state, shadow_path, transparent_bounce); +} +#else +ccl_device_inline int integrator_state_bounce(ConstIntegratorShadowState state, + const uint32_t path_flag) +{ + return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, bounce) : + INTEGRATOR_STATE(state, path, bounce); +} + +ccl_device_inline int integrator_state_diffuse_bounce(ConstIntegratorShadowState state, + const uint32_t path_flag) +{ + return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, diffuse_bounce) : + INTEGRATOR_STATE(state, path, diffuse_bounce); +} + +ccl_device_inline int integrator_state_glossy_bounce(ConstIntegratorShadowState state, + const uint32_t path_flag) +{ + return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, glossy_bounce) : + INTEGRATOR_STATE(state, path, glossy_bounce); +} + +ccl_device_inline int integrator_state_transmission_bounce(ConstIntegratorShadowState state, + const uint32_t path_flag) +{ + return (path_flag & PATH_RAY_SHADOW) ? + INTEGRATOR_STATE(state, shadow_path, transmission_bounce) : + INTEGRATOR_STATE(state, path, transmission_bounce); +} + +ccl_device_inline int integrator_state_transparent_bounce(ConstIntegratorShadowState state, + const uint32_t path_flag) +{ + return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, transparent_bounce) : + INTEGRATOR_STATE(state, path, transparent_bounce); +} +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_volume_stack.h b/intern/cycles/kernel/integrator/integrator_volume_stack.h index e3a4546508f..cf69826ffff 100644 --- a/intern/cycles/kernel/integrator/integrator_volume_stack.h +++ b/intern/cycles/kernel/integrator/integrator_volume_stack.h @@ -98,7 +98,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg, } ccl_device void shadow_volume_stack_enter_exit(KernelGlobals kg, - IntegratorState state, + IntegratorShadowState state, ccl_private const ShaderData *sd) { volume_stack_enter_exit( @@ -136,9 +136,7 @@ ccl_device_inline void volume_stack_clean(KernelGlobals kg, IntegratorState stat } template<typename StackReadOp> -ccl_device float volume_stack_step_size(KernelGlobals kg, - IntegratorState state, - StackReadOp stack_read) +ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read) { float step_size = FLT_MAX; diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index 5f32150d33c..848aaa18aae 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -393,17 +393,20 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg /* Write light contribution to render buffer. */ ccl_device_inline void kernel_accum_light(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorShadowState state, ccl_global float *ccl_restrict render_buffer) { /* The throughput for shadow paths already contains the light shader evaluation. */ float3 contribution = INTEGRATOR_STATE(state, shadow_path, throughput); kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce)); - ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, shadow_path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + ccl_global float *buffer = render_buffer + render_buffer_offset; const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag); - const int sample = INTEGRATOR_STATE(state, path, sample); + const int sample = INTEGRATOR_STATE(state, shadow_path, sample); kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer); diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h index 66eb468fdca..fa8de14916e 100644 --- a/intern/cycles/kernel/kernel_path_state.h +++ b/intern/cycles/kernel/kernel_path_state.h @@ -26,7 +26,9 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void path_state_init_queues(IntegratorState state) { INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; +#ifdef __KERNEL_CPU__ + INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0; +#endif } /* Minimalistic initialization of the path state, which is needed for early outputs in the @@ -293,16 +295,15 @@ ccl_device_inline void path_state_rng_load(ConstIntegratorState state, rng_state->sample = INTEGRATOR_STATE(state, path, sample); } -ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorState state, +ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState state, ccl_private RNGState *rng_state) { - const uint shadow_bounces = INTEGRATOR_STATE(state, shadow_path, transparent_bounce) - - INTEGRATOR_STATE(state, path, transparent_bounce); + const uint shadow_bounces = INTEGRATOR_STATE(state, shadow_path, transparent_bounce); - rng_state->rng_hash = INTEGRATOR_STATE(state, path, rng_hash); - rng_state->rng_offset = INTEGRATOR_STATE(state, path, rng_offset) + + rng_state->rng_hash = INTEGRATOR_STATE(state, shadow_path, rng_hash); + rng_state->rng_offset = INTEGRATOR_STATE(state, shadow_path, rng_offset) + PRNG_BOUNCE_NUM * shadow_bounces; - rng_state->sample = INTEGRATOR_STATE(state, path, sample); + rng_state->sample = INTEGRATOR_STATE(state, shadow_path, sample); } ccl_device_inline float path_state_rng_1D(KernelGlobals kg, diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 4a5a5309c61..d25191b72cf 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -575,9 +575,9 @@ ccl_device float3 shader_holdout_apply(KernelGlobals kg, ccl_private ShaderData /* Surface Evaluation */ -template<uint node_feature_mask> +template<uint node_feature_mask, typename ConstIntegratorGenericState> ccl_device void shader_eval_surface(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private ShaderData *ccl_restrict sd, ccl_global float *ccl_restrict buffer, uint32_t path_flag) @@ -753,9 +753,9 @@ ccl_device int shader_phase_sample_closure(KernelGlobals kg, /* Volume Evaluation */ -template<const bool shadow, typename StackReadOp> +template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState> ccl_device_inline void shader_eval_volume(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private ShaderData *ccl_restrict sd, const uint32_t path_flag, StackReadOp stack_read) @@ -831,8 +831,9 @@ ccl_device_inline void shader_eval_volume(KernelGlobals kg, /* Displacement Evaluation */ +template<typename ConstIntegratorGenericState> ccl_device void shader_eval_displacement(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private ShaderData *sd) { sd->num_closure = 0; diff --git a/intern/cycles/kernel/kernel_shadow_catcher.h b/intern/cycles/kernel/kernel_shadow_catcher.h index 00dddb5b198..9bed140b395 100644 --- a/intern/cycles/kernel/kernel_shadow_catcher.h +++ b/intern/cycles/kernel/kernel_shadow_catcher.h @@ -62,7 +62,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg, ConstIntegratorState state) { - if (INTEGRATOR_PATH_IS_TERMINATED && INTEGRATOR_SHADOW_PATH_IS_TERMINATED) { + if (INTEGRATOR_PATH_IS_TERMINATED) { return false; } diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 3e276c24cdd..edae158f403 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -812,6 +812,7 @@ typedef struct ccl_align(16) ShaderData #ifdef __OSL__ const struct KernelGlobalsCPU *osl_globals; const struct IntegratorStateCPU *osl_path_state; + const struct IntegratorShadowStateCPU *osl_shadow_path_state; #endif /* LCG state for closures that require additional random numbers. */ diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index bb7655fbe9a..cbe1bf1bfc0 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -1015,31 +1015,44 @@ bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg, else if (name == u_path_ray_depth) { /* Ray Depth */ const IntegratorStateCPU *state = sd->osl_path_state; - int f = state->path.bounce; + const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state; + int f = (state) ? state->path.bounce : (shadow_state) ? shadow_state->shadow_path.bounce : 0; return set_attribute_int(f, type, derivatives, val); } else if (name == u_path_diffuse_depth) { /* Diffuse Ray Depth */ const IntegratorStateCPU *state = sd->osl_path_state; - int f = state->path.diffuse_bounce; + const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state; + int f = (state) ? state->path.diffuse_bounce : + (shadow_state) ? shadow_state->shadow_path.diffuse_bounce : + 0; return set_attribute_int(f, type, derivatives, val); } else if (name == u_path_glossy_depth) { /* Glossy Ray Depth */ const IntegratorStateCPU *state = sd->osl_path_state; - int f = state->path.glossy_bounce; + const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state; + int f = (state) ? state->path.glossy_bounce : + (shadow_state) ? shadow_state->shadow_path.glossy_bounce : + 0; return set_attribute_int(f, type, derivatives, val); } else if (name == u_path_transmission_depth) { /* Transmission Ray Depth */ const IntegratorStateCPU *state = sd->osl_path_state; - int f = state->path.transmission_bounce; + const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state; + int f = (state) ? state->path.transmission_bounce : + (shadow_state) ? shadow_state->shadow_path.transmission_bounce : + 0; return set_attribute_int(f, type, derivatives, val); } else if (name == u_path_transparent_depth) { /* Transparent Ray Depth */ const IntegratorStateCPU *state = sd->osl_path_state; - int f = state->path.transparent_bounce; + const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state; + int f = (state) ? state->path.transparent_bounce : + (shadow_state) ? shadow_state->shadow_path.transparent_bounce : + 0; return set_attribute_int(f, type, derivatives, val); } else if (name == u_ndc) { @@ -1228,34 +1241,38 @@ bool OSLRenderServices::texture(ustring filename, /* Bevel shader hack. */ if (nchannels >= 3) { const IntegratorStateCPU *state = sd->osl_path_state; - int num_samples = (int)s; - float radius = t; - float3 N = svm_bevel(kernel_globals, state, sd, radius, num_samples); - result[0] = N.x; - result[1] = N.y; - result[2] = N.z; - status = true; + if (state) { + int num_samples = (int)s; + float radius = t; + float3 N = svm_bevel(kernel_globals, state, sd, radius, num_samples); + result[0] = N.x; + result[1] = N.y; + result[2] = N.z; + status = true; + } } break; } case OSLTextureHandle::AO: { /* AO shader hack. */ const IntegratorStateCPU *state = sd->osl_path_state; - int num_samples = (int)s; - float radius = t; - float3 N = make_float3(dsdx, dtdx, dsdy); - int flags = 0; - if ((int)dtdy) { - flags |= NODE_AO_INSIDE; - } - if ((int)options.sblur) { - flags |= NODE_AO_ONLY_LOCAL; - } - if ((int)options.tblur) { - flags |= NODE_AO_GLOBAL_RADIUS; + if (state) { + int num_samples = (int)s; + float radius = t; + float3 N = make_float3(dsdx, dtdx, dsdy); + int flags = 0; + if ((int)dtdy) { + flags |= NODE_AO_INSIDE; + } + if ((int)options.sblur) { + flags |= NODE_AO_ONLY_LOCAL; + } + if ((int)options.tblur) { + flags |= NODE_AO_GLOBAL_RADIUS; + } + result[0] = svm_ao(kernel_globals, state, sd, N, radius, num_samples, flags); + status = true; } - result[0] = svm_ao(kernel_globals, state, sd, N, radius, num_samples, flags); - status = true; break; } case OSLTextureHandle::SVM: { diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index 4c067e88ab6..fba207e7230 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -89,7 +89,7 @@ void OSLShader::thread_free(KernelGlobalsCPU *kg) static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg, ShaderData *sd, - const IntegratorStateCPU *state, + const void *state, uint32_t path_flag, OSLThreadData *tdata) { @@ -134,7 +134,12 @@ static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg, /* Used by render-services. */ sd->osl_globals = kg; - sd->osl_path_state = state; + if (path_flag & PATH_RAY_SHADOW) { + sd->osl_shadow_path_state = (const IntegratorShadowStateCPU *)state; + } + else { + sd->osl_path_state = (const IntegratorStateCPU *)state; + } } /* Surface */ @@ -175,7 +180,7 @@ static void flatten_surface_closure_tree(ShaderData *sd, } void OSLShader::eval_surface(const KernelGlobalsCPU *kg, - const IntegratorStateCPU *state, + const void *state, ShaderData *sd, uint32_t path_flag) { @@ -283,7 +288,7 @@ static void flatten_background_closure_tree(ShaderData *sd, } void OSLShader::eval_background(const KernelGlobalsCPU *kg, - const IntegratorStateCPU *state, + const void *state, ShaderData *sd, uint32_t path_flag) { @@ -341,7 +346,7 @@ static void flatten_volume_closure_tree(ShaderData *sd, } void OSLShader::eval_volume(const KernelGlobalsCPU *kg, - const IntegratorStateCPU *state, + const void *state, ShaderData *sd, uint32_t path_flag) { @@ -366,9 +371,7 @@ void OSLShader::eval_volume(const KernelGlobalsCPU *kg, /* Displacement */ -void OSLShader::eval_displacement(const KernelGlobalsCPU *kg, - const IntegratorStateCPU *state, - ShaderData *sd) +void OSLShader::eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd) { /* setup shader globals from shader data */ OSLThreadData *tdata = kg->osl_tdata; diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h index 2b3810b0a33..037a18a1f19 100644 --- a/intern/cycles/kernel/osl/osl_shader.h +++ b/intern/cycles/kernel/osl/osl_shader.h @@ -55,20 +55,18 @@ class OSLShader { /* eval */ static void eval_surface(const KernelGlobalsCPU *kg, - const IntegratorStateCPU *state, + const void *state, ShaderData *sd, uint32_t path_flag); static void eval_background(const KernelGlobalsCPU *kg, - const IntegratorStateCPU *state, + const void *state, ShaderData *sd, uint32_t path_flag); static void eval_volume(const KernelGlobalsCPU *kg, - const IntegratorStateCPU *state, + const void *state, ShaderData *sd, uint32_t path_flag); - static void eval_displacement(const KernelGlobalsCPU *kg, - const IntegratorStateCPU *state, - ShaderData *sd); + static void eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd); /* attributes */ static int find_attribute(const KernelGlobalsCPU *kg, diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 57879dc238f..472f3517839 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -225,9 +225,9 @@ CCL_NAMESPACE_END CCL_NAMESPACE_BEGIN /* Main Interpreter Loop */ -template<uint node_feature_mask, ShaderType type> +template<uint node_feature_mask, ShaderType type, typename ConstIntegratorGenericState> ccl_device void svm_eval_nodes(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ShaderData *sd, ccl_global float *render_buffer, uint32_t path_flag) diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h index a1efd2f0a43..4cfef7bc204 100644 --- a/intern/cycles/kernel/svm/svm_ao.h +++ b/intern/cycles/kernel/svm/svm_ao.h @@ -21,17 +21,17 @@ CCL_NAMESPACE_BEGIN #ifdef __SHADER_RAYTRACE__ # ifdef __KERNEL_OPTIX__ -extern "C" __device__ float __direct_callable__svm_node_ao(KernelGlobals kg, - ConstIntegratorState state, +extern "C" __device__ float __direct_callable__svm_node_ao( # else -ccl_device float svm_ao(KernelGlobals kg, - ConstIntegratorState state, +ccl_device float svm_ao( # endif - ccl_private ShaderData *sd, - float3 N, - float max_dist, - int num_samples, - int flags) + KernelGlobals kg, + ConstIntegratorState state, + ccl_private ShaderData *sd, + float3 N, + float max_dist, + int num_samples, + int flags) { if (flags & NODE_AO_GLOBAL_RADIUS) { max_dist = kernel_data.integrator.ao_bounces_distance; @@ -91,7 +91,7 @@ ccl_device float svm_ao(KernelGlobals kg, return ((float)unoccluded) / num_samples; } -template<uint node_feature_mask> +template<uint node_feature_mask, typename ConstIntegratorGenericState> # if defined(__KERNEL_OPTIX__) ccl_device_inline # else @@ -99,7 +99,7 @@ ccl_device_noinline # endif void svm_node_ao(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) diff --git a/intern/cycles/kernel/svm/svm_aov.h b/intern/cycles/kernel/svm/svm_aov.h index 0d6395d52c0..833a6443b3c 100644 --- a/intern/cycles/kernel/svm/svm_aov.h +++ b/intern/cycles/kernel/svm/svm_aov.h @@ -26,9 +26,9 @@ ccl_device_inline bool svm_node_aov_check(const uint32_t path_flag, return ((render_buffer != NULL) && is_primary); } -template<uint node_feature_mask> +template<uint node_feature_mask, typename ConstIntegratorGenericState> ccl_device void svm_node_aov_color(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, @@ -46,9 +46,9 @@ ccl_device void svm_node_aov_color(KernelGlobals kg, } } -template<uint node_feature_mask> +template<uint node_feature_mask, typename ConstIntegratorGenericState> ccl_device void svm_node_aov_value(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h index 3ce3af20795..292887beedf 100644 --- a/intern/cycles/kernel/svm/svm_bevel.h +++ b/intern/cycles/kernel/svm/svm_bevel.h @@ -99,15 +99,15 @@ ccl_device void svm_bevel_cubic_sample(const float radius, */ # ifdef __KERNEL_OPTIX__ -extern "C" __device__ float3 __direct_callable__svm_node_bevel(KernelGlobals kg, - ConstIntegratorState state, +extern "C" __device__ float3 __direct_callable__svm_node_bevel( # else -ccl_device float3 svm_bevel(KernelGlobals kg, - ConstIntegratorState state, +ccl_device float3 svm_bevel( # endif - ccl_private ShaderData *sd, - float radius, - int num_samples) + KernelGlobals kg, + ConstIntegratorState state, + ccl_private ShaderData *sd, + float radius, + int num_samples) { /* Early out if no sampling needed. */ if (radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) { @@ -282,7 +282,7 @@ ccl_device float3 svm_bevel(KernelGlobals kg, return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N; } -template<uint node_feature_mask> +template<uint node_feature_mask, typename ConstIntegratorGenericState> # if defined(__KERNEL_OPTIX__) ccl_device_inline # else @@ -290,7 +290,7 @@ ccl_device_noinline # endif void svm_node_bevel(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h index c61ace9757a..5e1fc4f671c 100644 --- a/intern/cycles/kernel/svm/svm_light_path.h +++ b/intern/cycles/kernel/svm/svm_light_path.h @@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN /* Light Path Node */ -template<uint node_feature_mask> +template<uint node_feature_mask, typename ConstIntegratorGenericState> ccl_device_noinline void svm_node_light_path(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private const ShaderData *sd, ccl_private float *stack, uint type, @@ -64,48 +64,43 @@ ccl_device_noinline void svm_node_light_path(KernelGlobals kg, /* Read bounce from difference location depending if this is a shadow * path. It's a bit dubious to have integrate state details leak into * this function but hard to avoid currently. */ - int bounce = 0; IF_KERNEL_NODES_FEATURE(LIGHT_PATH) { - bounce = (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, bounce) : - INTEGRATOR_STATE(state, path, bounce); + info = (float)integrator_state_bounce(state, path_flag); } /* For background, light emission and shadow evaluation we from a * surface or volume we are effective one bounce further. */ if (path_flag & (PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { - bounce++; + info += 1.0f; } - - info = (float)bounce; break; } - /* TODO */ case NODE_LP_ray_transparent: { - int bounce = 0; IF_KERNEL_NODES_FEATURE(LIGHT_PATH) { - bounce = (path_flag & PATH_RAY_SHADOW) ? - INTEGRATOR_STATE(state, shadow_path, transparent_bounce) : - INTEGRATOR_STATE(state, path, transparent_bounce); + info = (float)integrator_state_transparent_bounce(state, path_flag); } - - info = (float)bounce; break; } -#if 0 case NODE_LP_ray_diffuse: - info = (float)state->diffuse_bounce; + IF_KERNEL_NODES_FEATURE(LIGHT_PATH) + { + info = (float)integrator_state_diffuse_bounce(state, path_flag); + } break; case NODE_LP_ray_glossy: - info = (float)state->glossy_bounce; + IF_KERNEL_NODES_FEATURE(LIGHT_PATH) + { + info = (float)integrator_state_glossy_bounce(state, path_flag); + } break; -#endif -#if 0 case NODE_LP_ray_transmission: - info = (float)state->transmission_bounce; + IF_KERNEL_NODES_FEATURE(LIGHT_PATH) + { + info = (float)integrator_state_transmission_bounce(state, path_flag); + } break; -#endif } stack_store_float(stack, out_offset, info); |