diff options
author | Brecht Van Lommel <brecht> | 2021-10-17 17:22:20 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-10-19 16:09:29 +0300 |
commit | 943e73b07e26d64c04ccb7d8f656e3818a57cca0 (patch) | |
tree | 870e21cb9b8c49878138e16360178ee293b78f6c /intern/cycles/kernel/svm | |
parent | 6e473a897ce563ad04224bdd731387b0dbd22235 (diff) |
Cycles: decouple shadow paths from main path on GPU
The motivation for this is twofold. It improves performance (5-10% on most
benchmark scenes), and will help to bring back transparency support for the
ambient occlusion pass.
* Duplicate some members from the main path state in the shadow path state.
* Add shadow paths incrementally to the array similar to what we do for
the shadow catchers.
* For the scheduling, allow running shade surface and shade volume kernels
as long as there is enough space in the shadow paths array. If not, execute
shadow kernels until it is empty.
* Add IntegratorShadowState and ConstIntegratorShadowState typedefs that
can be different between CPU and GPU. For GPU both main and shadow paths
juse have an integer for SoA access. Bt with CPU it's a different pointer
type so we get type safety checks in code shared between CPU and GPU.
* For CPU, add a separate IntegratorShadowStateCPU struct embedded in
IntegratorShadowState.
* Update various functions to take the shadow state, and make SVM take either
type of state using templates.
Differential Revision: https://developer.blender.org/D12889
Diffstat (limited to 'intern/cycles/kernel/svm')
-rw-r--r-- | intern/cycles/kernel/svm/svm.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_ao.h | 22 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_aov.h | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_bevel.h | 18 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_light_path.h | 39 |
5 files changed, 43 insertions, 48 deletions
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 57879dc238f..472f3517839 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -225,9 +225,9 @@ CCL_NAMESPACE_END CCL_NAMESPACE_BEGIN /* Main Interpreter Loop */ -template<uint node_feature_mask, ShaderType type> +template<uint node_feature_mask, ShaderType type, typename ConstIntegratorGenericState> ccl_device void svm_eval_nodes(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ShaderData *sd, ccl_global float *render_buffer, uint32_t path_flag) diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h index a1efd2f0a43..4cfef7bc204 100644 --- a/intern/cycles/kernel/svm/svm_ao.h +++ b/intern/cycles/kernel/svm/svm_ao.h @@ -21,17 +21,17 @@ CCL_NAMESPACE_BEGIN #ifdef __SHADER_RAYTRACE__ # ifdef __KERNEL_OPTIX__ -extern "C" __device__ float __direct_callable__svm_node_ao(KernelGlobals kg, - ConstIntegratorState state, +extern "C" __device__ float __direct_callable__svm_node_ao( # else -ccl_device float svm_ao(KernelGlobals kg, - ConstIntegratorState state, +ccl_device float svm_ao( # endif - ccl_private ShaderData *sd, - float3 N, - float max_dist, - int num_samples, - int flags) + KernelGlobals kg, + ConstIntegratorState state, + ccl_private ShaderData *sd, + float3 N, + float max_dist, + int num_samples, + int flags) { if (flags & NODE_AO_GLOBAL_RADIUS) { max_dist = kernel_data.integrator.ao_bounces_distance; @@ -91,7 +91,7 @@ ccl_device float svm_ao(KernelGlobals kg, return ((float)unoccluded) / num_samples; } -template<uint node_feature_mask> +template<uint node_feature_mask, typename ConstIntegratorGenericState> # if defined(__KERNEL_OPTIX__) ccl_device_inline # else @@ -99,7 +99,7 @@ ccl_device_noinline # endif void svm_node_ao(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) diff --git a/intern/cycles/kernel/svm/svm_aov.h b/intern/cycles/kernel/svm/svm_aov.h index 0d6395d52c0..833a6443b3c 100644 --- a/intern/cycles/kernel/svm/svm_aov.h +++ b/intern/cycles/kernel/svm/svm_aov.h @@ -26,9 +26,9 @@ ccl_device_inline bool svm_node_aov_check(const uint32_t path_flag, return ((render_buffer != NULL) && is_primary); } -template<uint node_feature_mask> +template<uint node_feature_mask, typename ConstIntegratorGenericState> ccl_device void svm_node_aov_color(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, @@ -46,9 +46,9 @@ ccl_device void svm_node_aov_color(KernelGlobals kg, } } -template<uint node_feature_mask> +template<uint node_feature_mask, typename ConstIntegratorGenericState> ccl_device void svm_node_aov_value(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h index 3ce3af20795..292887beedf 100644 --- a/intern/cycles/kernel/svm/svm_bevel.h +++ b/intern/cycles/kernel/svm/svm_bevel.h @@ -99,15 +99,15 @@ ccl_device void svm_bevel_cubic_sample(const float radius, */ # ifdef __KERNEL_OPTIX__ -extern "C" __device__ float3 __direct_callable__svm_node_bevel(KernelGlobals kg, - ConstIntegratorState state, +extern "C" __device__ float3 __direct_callable__svm_node_bevel( # else -ccl_device float3 svm_bevel(KernelGlobals kg, - ConstIntegratorState state, +ccl_device float3 svm_bevel( # endif - ccl_private ShaderData *sd, - float radius, - int num_samples) + KernelGlobals kg, + ConstIntegratorState state, + ccl_private ShaderData *sd, + float radius, + int num_samples) { /* Early out if no sampling needed. */ if (radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) { @@ -282,7 +282,7 @@ ccl_device float3 svm_bevel(KernelGlobals kg, return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N; } -template<uint node_feature_mask> +template<uint node_feature_mask, typename ConstIntegratorGenericState> # if defined(__KERNEL_OPTIX__) ccl_device_inline # else @@ -290,7 +290,7 @@ ccl_device_noinline # endif void svm_node_bevel(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h index c61ace9757a..5e1fc4f671c 100644 --- a/intern/cycles/kernel/svm/svm_light_path.h +++ b/intern/cycles/kernel/svm/svm_light_path.h @@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN /* Light Path Node */ -template<uint node_feature_mask> +template<uint node_feature_mask, typename ConstIntegratorGenericState> ccl_device_noinline void svm_node_light_path(KernelGlobals kg, - ConstIntegratorState state, + ConstIntegratorGenericState state, ccl_private const ShaderData *sd, ccl_private float *stack, uint type, @@ -64,48 +64,43 @@ ccl_device_noinline void svm_node_light_path(KernelGlobals kg, /* Read bounce from difference location depending if this is a shadow * path. It's a bit dubious to have integrate state details leak into * this function but hard to avoid currently. */ - int bounce = 0; IF_KERNEL_NODES_FEATURE(LIGHT_PATH) { - bounce = (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, bounce) : - INTEGRATOR_STATE(state, path, bounce); + info = (float)integrator_state_bounce(state, path_flag); } /* For background, light emission and shadow evaluation we from a * surface or volume we are effective one bounce further. */ if (path_flag & (PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { - bounce++; + info += 1.0f; } - - info = (float)bounce; break; } - /* TODO */ case NODE_LP_ray_transparent: { - int bounce = 0; IF_KERNEL_NODES_FEATURE(LIGHT_PATH) { - bounce = (path_flag & PATH_RAY_SHADOW) ? - INTEGRATOR_STATE(state, shadow_path, transparent_bounce) : - INTEGRATOR_STATE(state, path, transparent_bounce); + info = (float)integrator_state_transparent_bounce(state, path_flag); } - - info = (float)bounce; break; } -#if 0 case NODE_LP_ray_diffuse: - info = (float)state->diffuse_bounce; + IF_KERNEL_NODES_FEATURE(LIGHT_PATH) + { + info = (float)integrator_state_diffuse_bounce(state, path_flag); + } break; case NODE_LP_ray_glossy: - info = (float)state->glossy_bounce; + IF_KERNEL_NODES_FEATURE(LIGHT_PATH) + { + info = (float)integrator_state_glossy_bounce(state, path_flag); + } break; -#endif -#if 0 case NODE_LP_ray_transmission: - info = (float)state->transmission_bounce; + IF_KERNEL_NODES_FEATURE(LIGHT_PATH) + { + info = (float)integrator_state_transmission_bounce(state, path_flag); + } break; -#endif } stack_store_float(stack, out_offset, info); |