Cycles: decouple shadow paths from main path on GPU

The motivation for this is twofold. It improves performance (5-10% on most benchmark scenes), and will help to bring back transparency support for the ambient occlusion pass. * Duplicate some members from the main path state in the shadow path state. * Add shadow paths incrementally to the array similar to what we do for the shadow catchers. * For the scheduling, allow running shade surface and shade volume kernels as long as there is enough space in the shadow paths array. If not, execute shadow kernels until it is empty. * Add IntegratorShadowState and ConstIntegratorShadowState typedefs that can be different between CPU and GPU. For GPU both main and shadow paths juse have an integer for SoA access. Bt with CPU it's a different pointer type so we get type safety checks in code shared between CPU and GPU. * For CPU, add a separate IntegratorShadowStateCPU struct embedded in IntegratorShadowState. * Update various functions to take the shadow state, and make SVM take either type of state using templates. Differential Revision: https://developer.blender.org/D12889
author: Brecht Van Lommel <brecht> 2021-10-17 17:22:20 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2021-10-19 16:09:29 +0300
commit: 943e73b07e26d64c04ccb7d8f656e3818a57cca0 (patch)
tree: 870e21cb9b8c49878138e16360178ee293b78f6c /intern/cycles/kernel/svm
parent: 6e473a897ce563ad04224bdd731387b0dbd22235 (diff)
5 files changed, 43 insertions, 48 deletions
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 57879dc238f..472f3517839 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -225,9 +225,9 @@ CCL_NAMESPACE_END
 CCL_NAMESPACE_BEGIN
 
 /* Main Interpreter Loop */
-template<uint node_feature_mask, ShaderType type>
+template<uint node_feature_mask, ShaderType type, typename ConstIntegratorGenericState>
 ccl_device void svm_eval_nodes(KernelGlobals kg,
-                               ConstIntegratorState state,
+                               ConstIntegratorGenericState state,
                                ShaderData *sd,
                                ccl_global float *render_buffer,
                                uint32_t path_flag)
diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h
index a1efd2f0a43..4cfef7bc204 100644
--- a/intern/cycles/kernel/svm/svm_ao.h
+++ b/intern/cycles/kernel/svm/svm_ao.h
@@ -21,17 +21,17 @@ CCL_NAMESPACE_BEGIN
 #ifdef __SHADER_RAYTRACE__
 
 #  ifdef __KERNEL_OPTIX__
-extern "C" __device__ float __direct_callable__svm_node_ao(KernelGlobals kg,
-                                                           ConstIntegratorState state,
+extern "C" __device__ float __direct_callable__svm_node_ao(
 #  else
-ccl_device float svm_ao(KernelGlobals kg,
-                        ConstIntegratorState state,
+ccl_device float svm_ao(
 #  endif
-                                                           ccl_private ShaderData *sd,
-                                                           float3 N,
-                                                           float max_dist,
-                                                           int num_samples,
-                                                           int flags)
+    KernelGlobals kg,
+    ConstIntegratorState state,
+    ccl_private ShaderData *sd,
+    float3 N,
+    float max_dist,
+    int num_samples,
+    int flags)
 {
   if (flags & NODE_AO_GLOBAL_RADIUS) {
     max_dist = kernel_data.integrator.ao_bounces_distance;
@@ -91,7 +91,7 @@ ccl_device float svm_ao(KernelGlobals kg,
   return ((float)unoccluded) / num_samples;
 }
 
-template<uint node_feature_mask>
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
 #  if defined(__KERNEL_OPTIX__)
 ccl_device_inline
 #  else
@@ -99,7 +99,7 @@ ccl_device_noinline
 #  endif
     void
     svm_node_ao(KernelGlobals kg,
-                ConstIntegratorState state,
+                ConstIntegratorGenericState state,
                 ccl_private ShaderData *sd,
                 ccl_private float *stack,
                 uint4 node)
diff --git a/intern/cycles/kernel/svm/svm_aov.h b/intern/cycles/kernel/svm/svm_aov.h
index 0d6395d52c0..833a6443b3c 100644
--- a/intern/cycles/kernel/svm/svm_aov.h
+++ b/intern/cycles/kernel/svm/svm_aov.h
@@ -26,9 +26,9 @@ ccl_device_inline bool svm_node_aov_check(const uint32_t path_flag,
   return ((render_buffer != NULL) && is_primary);
 }
 
-template<uint node_feature_mask>
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
 ccl_device void svm_node_aov_color(KernelGlobals kg,
-                                   ConstIntegratorState state,
+                                   ConstIntegratorGenericState state,
                                    ccl_private ShaderData *sd,
                                    ccl_private float *stack,
                                    uint4 node,
@@ -46,9 +46,9 @@ ccl_device void svm_node_aov_color(KernelGlobals kg,
   }
 }
 
-template<uint node_feature_mask>
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
 ccl_device void svm_node_aov_value(KernelGlobals kg,
-                                   ConstIntegratorState state,
+                                   ConstIntegratorGenericState state,
                                    ccl_private ShaderData *sd,
                                    ccl_private float *stack,
                                    uint4 node,
diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h
index 3ce3af20795..292887beedf 100644
--- a/intern/cycles/kernel/svm/svm_bevel.h
+++ b/intern/cycles/kernel/svm/svm_bevel.h
@@ -99,15 +99,15 @@ ccl_device void svm_bevel_cubic_sample(const float radius,
  */
 
 #  ifdef __KERNEL_OPTIX__
-extern "C" __device__ float3 __direct_callable__svm_node_bevel(KernelGlobals kg,
-                                                               ConstIntegratorState state,
+extern "C" __device__ float3 __direct_callable__svm_node_bevel(
 #  else
-ccl_device float3 svm_bevel(KernelGlobals kg,
-                            ConstIntegratorState state,
+ccl_device float3 svm_bevel(
 #  endif
-                                                               ccl_private ShaderData *sd,
-                                                               float radius,
-                                                               int num_samples)
+    KernelGlobals kg,
+    ConstIntegratorState state,
+    ccl_private ShaderData *sd,
+    float radius,
+    int num_samples)
 {
   /* Early out if no sampling needed. */
   if (radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
@@ -282,7 +282,7 @@ ccl_device float3 svm_bevel(KernelGlobals kg,
   return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N;
 }
 
-template<uint node_feature_mask>
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
 #  if defined(__KERNEL_OPTIX__)
 ccl_device_inline
 #  else
@@ -290,7 +290,7 @@ ccl_device_noinline
 #  endif
     void
     svm_node_bevel(KernelGlobals kg,
-                   ConstIntegratorState state,
+                   ConstIntegratorGenericState state,
                    ccl_private ShaderData *sd,
                    ccl_private float *stack,
                    uint4 node)
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index c61ace9757a..5e1fc4f671c 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Light Path Node */
 
-template<uint node_feature_mask>
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
 ccl_device_noinline void svm_node_light_path(KernelGlobals kg,
-                                             ConstIntegratorState state,
+                                             ConstIntegratorGenericState state,
                                              ccl_private const ShaderData *sd,
                                              ccl_private float *stack,
                                              uint type,
@@ -64,48 +64,43 @@ ccl_device_noinline void svm_node_light_path(KernelGlobals kg,
       /* Read bounce from difference location depending if this is a shadow
        * path. It's a bit dubious to have integrate state details leak into
        * this function but hard to avoid currently. */
-      int bounce = 0;
       IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
       {
-        bounce = (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, bounce) :
-                                                 INTEGRATOR_STATE(state, path, bounce);
+        info = (float)integrator_state_bounce(state, path_flag);
       }
 
       /* For background, light emission and shadow evaluation we from a
        * surface or volume we are effective one bounce further. */
       if (path_flag & (PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
-        bounce++;
+        info += 1.0f;
       }
-
-      info = (float)bounce;
       break;
     }
-      /* TODO */
     case NODE_LP_ray_transparent: {
-      int bounce = 0;
       IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
       {
-        bounce = (path_flag & PATH_RAY_SHADOW) ?
-                     INTEGRATOR_STATE(state, shadow_path, transparent_bounce) :
-                     INTEGRATOR_STATE(state, path, transparent_bounce);
+        info = (float)integrator_state_transparent_bounce(state, path_flag);
       }
-
-      info = (float)bounce;
       break;
     }
-#if 0
     case NODE_LP_ray_diffuse:
-      info = (float)state->diffuse_bounce;
+      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
+      {
+        info = (float)integrator_state_diffuse_bounce(state, path_flag);
+      }
       break;
     case NODE_LP_ray_glossy:
-      info = (float)state->glossy_bounce;
+      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
+      {
+        info = (float)integrator_state_glossy_bounce(state, path_flag);
+      }
       break;
-#endif
-#if 0
     case NODE_LP_ray_transmission:
-      info = (float)state->transmission_bounce;
+      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
+      {
+        info = (float)integrator_state_transmission_bounce(state, path_flag);
+      }
       break;
-#endif
   }
 
   stack_store_float(stack, out_offset, info);
author	Brecht Van Lommel <brecht>	2021-10-17 17:22:20 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2021-10-19 16:09:29 +0300
commit	943e73b07e26d64c04ccb7d8f656e3818a57cca0 (patch)
tree	870e21cb9b8c49878138e16360178ee293b78f6c /intern/cycles/kernel/svm
parent	6e473a897ce563ad04224bdd731387b0dbd22235 (diff)