1 files changed, 133 insertions, 98 deletions
diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h
index ce1398859b7..c19f56a9b70 100644
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -3,14 +3,15 @@
 
 #pragma once
 
-#include "kernel/film/accumulate.h"
-#include "kernel/film/passes.h"
+#include "kernel/film/data_passes.h"
+#include "kernel/film/denoising_passes.h"
+#include "kernel/film/light_passes.h"
 
 #include "kernel/integrator/mnee.h"
 
 #include "kernel/integrator/path_state.h"
-#include "kernel/integrator/shader_eval.h"
 #include "kernel/integrator/subsurface.h"
+#include "kernel/integrator/surface_shader.h"
 #include "kernel/integrator/volume_stack.h"
 
 #include "kernel/light/light.h"
@@ -31,7 +32,52 @@ ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg,
   shader_setup_from_ray(kg, sd, &ray, &isect);
 }
 
-#ifdef __HOLDOUT__
+ccl_device_forceinline float3 integrate_surface_ray_offset(KernelGlobals kg,
+                                                           const ccl_private ShaderData *sd,
+                                                           const float3 ray_P,
+                                                           const float3 ray_D)
+{
+  /* No ray offset needed for other primitive types. */
+  if (!(sd->type & PRIMITIVE_TRIANGLE)) {
+    return ray_P;
+  }
+
+  /* Self intersection tests already account for the case where a ray hits the
+   * same primitive. However precision issues can still cause neighboring
+   * triangles to be hit. Here we test if the ray-triangle intersection with
+   * the same primitive would miss, implying that a neighboring triangle would
+   * be hit instead.
+   *
+   * This relies on triangle intersection to be watertight, and the object inverse
+   * object transform to match the one used by ray intersection exactly.
+   *
+   * Potential improvements:
+   * - It appears this happens when either barycentric coordinates are small,
+   *   or dot(sd->Ng, ray_D)  is small. Detect such cases and skip test?
+   * - Instead of ray offset, can we tweak P to lie within the triangle?
+   */
+  const uint tri_vindex = kernel_data_fetch(tri_vindex, sd->prim).w;
+  const packed_float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0),
+                      tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1),
+                      tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
+
+  float3 local_ray_P = ray_P;
+  float3 local_ray_D = ray_D;
+
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    const Transform itfm = object_get_inverse_transform(kg, sd);
+    local_ray_P = transform_point(&itfm, local_ray_P);
+    local_ray_D = transform_direction(&itfm, local_ray_D);
+  }
+
+  if (ray_triangle_intersect_self(local_ray_P, local_ray_D, tri_a, tri_b, tri_c)) {
+    return ray_P;
+  }
+  else {
+    return ray_offset(ray_P, sd->Ng);
+  }
+}
+
 ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg,
                                                       ConstIntegratorState state,
                                                       ccl_private ShaderData *sd,
@@ -42,22 +88,18 @@ ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg,
 
   if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
       (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
-    const float3 holdout_weight = shader_holdout_apply(kg, sd);
-    if (kernel_data.background.transparent) {
-      const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-      const float transparent = average(holdout_weight * throughput);
-      kernel_accum_holdout(kg, state, path_flag, transparent, render_buffer);
-    }
-    if (isequal_float3(holdout_weight, one_float3())) {
+    const Spectrum holdout_weight = surface_shader_apply_holdout(kg, sd);
+    const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+    const float transparent = average(holdout_weight * throughput);
+    film_write_holdout(kg, state, path_flag, transparent, render_buffer);
+    if (isequal(holdout_weight, one_spectrum())) {
       return false;
     }
   }
 
   return true;
 }
-#endif /* __HOLDOUT__ */
 
-#ifdef __EMISSION__
 ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
                                                        ConstIntegratorState state,
                                                        ccl_private const ShaderData *sd,
@@ -67,32 +109,29 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
   const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
   /* Evaluate emissive closure. */
-  float3 L = shader_emissive_eval(sd);
+  Spectrum L = surface_shader_emission(sd);
+  float mis_weight = 1.0f;
 
-#  ifdef __HAIR__
+#ifdef __HAIR__
   if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) &&
       (sd->type & PRIMITIVE_TRIANGLE))
-#  else
+#else
   if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
-#  endif
+#endif
   {
     const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
-    const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t);
+    const float t = sd->ray_length;
 
     /* Multiple importance sampling, get triangle light pdf,
      * and compute weight with respect to BSDF pdf. */
     float pdf = triangle_light_pdf(kg, sd, t);
-    float mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf);
-    L *= mis_weight;
+    mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf);
   }
 
-  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-  kernel_accum_emission(
-      kg, state, throughput * L, render_buffer, object_lightgroup(kg, sd->object));
+  film_write_surface_emission(
+      kg, state, L, mis_weight, render_buffer, object_lightgroup(kg, sd->object));
 }
-#endif /* __EMISSION__ */
 
-#ifdef __EMISSION__
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
 template<uint node_feature_mask>
@@ -111,11 +150,10 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
   {
     const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
     const uint bounce = INTEGRATOR_STATE(state, path, bounce);
-    float light_u, light_v;
-    path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+    const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);
 
     if (!light_distribution_sample_from_position(
-            kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, &ls)) {
+            kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, &ls)) {
       return;
     }
   }
@@ -133,15 +171,15 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
 
   Ray ray ccl_optional_struct_init;
   BsdfEval bsdf_eval ccl_optional_struct_init;
-  const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D);
+  const bool is_transmission = surface_shader_is_transmission(sd, ls.D);
 
-#  ifdef __MNEE__
+#ifdef __MNEE__
   int mnee_vertex_count = 0;
   IF_KERNEL_FEATURE(MNEE)
   {
     if (ls.lamp != LAMP_NONE) {
       /* Is this a caustic light? */
-      const bool use_caustics = kernel_tex_fetch(__lights, ls.lamp).use_caustics;
+      const bool use_caustics = kernel_data_fetch(lights, ls.lamp).use_caustics;
       if (use_caustics) {
         /* Are we on a caustic caster? */
         if (is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_CASTER))
@@ -161,16 +199,17 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
     light_sample_to_surface_shadow_ray(kg, emission_sd, &ls, &ray);
   }
   else
-#  endif /* __MNEE__ */
+#endif /* __MNEE__ */
   {
-    const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
+    const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
     if (is_zero(light_eval)) {
       return;
     }
 
     /* Evaluate BSDF. */
-    const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader);
-    bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf);
+    const float bsdf_pdf = surface_shader_bsdf_eval(
+        kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader);
+    bsdf_eval_mul(&bsdf_eval, light_eval / ls.pdf);
 
     if (ls.shader & SHADER_USE_MIS) {
       const float mis_weight = light_sample_mis_weight_nee(kg, ls.pdf, bsdf_pdf);
@@ -190,16 +229,20 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
   const bool is_light = light_sample_is_light(&ls);
 
   /* Branch off shadow kernel. */
-  INTEGRATOR_SHADOW_PATH_INIT(
-      shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
+  IntegratorShadowState shadow_state = integrator_shadow_path_init(
+      kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
 
   /* Copy volume stack and enter/exit volume. */
   integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
 
   if (is_transmission) {
-#  ifdef __VOLUME__
+#ifdef __VOLUME__
     shadow_volume_stack_enter_exit(kg, shadow_state, sd);
-#  endif
+#endif
+  }
+
+  if (ray.self.object != OBJECT_NONE) {
+    ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D);
   }
 
   /* Write shadow ray and associated state to global memory. */
@@ -213,11 +256,12 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
   /* Copy state from main path to shadow path. */
   uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
   shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
-  const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval);
+  const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput) *
+                              bsdf_eval_sum(&bsdf_eval);
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    packed_float3 pass_diffuse_weight;
-    packed_float3 pass_glossy_weight;
+    PackedSpectrum pass_diffuse_weight;
+    PackedSpectrum pass_glossy_weight;
 
     if (shadow_flag & PATH_RAY_ANY_PASS) {
       /* Indirect bounce, use weights from earlier surface or volume bounce. */
@@ -227,8 +271,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
     else {
       /* Direct light, use BSDFs at this bounce. */
       shadow_flag |= PATH_RAY_SURFACE_PASS;
-      pass_diffuse_weight = packed_float3(bsdf_eval_pass_diffuse_weight(&bsdf_eval));
-      pass_glossy_weight = packed_float3(bsdf_eval_pass_glossy_weight(&bsdf_eval));
+      pass_diffuse_weight = PackedSpectrum(bsdf_eval_pass_diffuse_weight(&bsdf_eval));
+      pass_glossy_weight = PackedSpectrum(bsdf_eval_pass_glossy_weight(&bsdf_eval));
     }
 
     INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight;
@@ -250,7 +294,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
   INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE(
       state, path, glossy_bounce);
 
-#  ifdef __MNEE__
+#ifdef __MNEE__
   if (mnee_vertex_count > 0) {
     INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) =
         INTEGRATOR_STATE(state, path, transmission_bounce) + mnee_vertex_count - 1;
@@ -262,7 +306,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
                            bounce) = INTEGRATOR_STATE(state, path, bounce) + mnee_vertex_count;
   }
   else
-#  endif
+#endif
   {
     INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE(
         state, path, transmission_bounce);
@@ -284,7 +328,6 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
                                                    ls.group + 1 :
                                                    kernel_data.background.lightgroup + 1;
 }
-#endif
 
 /* Path tracing: bounce off or through surface with new direction. */
 ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
@@ -298,9 +341,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
     return LABEL_NONE;
   }
 
-  float bsdf_u, bsdf_v;
-  path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-  ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
+  float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF);
+  ccl_private const ShaderClosure *sc = surface_shader_bsdf_bssrdf_pick(sd, &rand_bsdf);
 
 #ifdef __SUBSURFACE__
   /* BSSRDF closure, we schedule subsurface intersection kernel. */
@@ -313,29 +355,33 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
   float bsdf_pdf;
   BsdfEval bsdf_eval ccl_optional_struct_init;
   float3 bsdf_omega_in ccl_optional_struct_init;
-  differential3 bsdf_domega_in ccl_optional_struct_init;
   int label;
 
-  label = shader_bsdf_sample_closure(
-      kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+  label = surface_shader_bsdf_sample_closure(
+      kg, sd, sc, rand_bsdf, &bsdf_eval, &bsdf_omega_in, &bsdf_pdf);
 
   if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) {
     return LABEL_NONE;
   }
 
-  /* Setup ray. Note that clipping works through transparent bounces. */
-  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
-  INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
-  INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ?
-                                              INTEGRATOR_STATE(state, ray, t) - sd->ray_length :
-                                              FLT_MAX;
+  if (label & LABEL_TRANSPARENT) {
+    /* Only need to modify start distance for transparent. */
+    INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length);
+  }
+  else {
+    /* Setup ray with changed origin and direction. */
+    const float3 D = normalize(bsdf_omega_in);
+    INTEGRATOR_STATE_WRITE(state, ray, P) = integrate_surface_ray_offset(kg, sd, sd->P, D);
+    INTEGRATOR_STATE_WRITE(state, ray, D) = D;
+    INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
+    INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
 #ifdef __RAY_DIFFERENTIALS__
-  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
-  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
+    INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
 #endif
+  }
 
   /* Update throughput. */
-  float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+  Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
   throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf;
   INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
 
@@ -349,12 +395,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
   }
 
   /* Update path state */
-  if (label & LABEL_TRANSPARENT) {
-    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
-  }
-  else {
+  if (!(label & LABEL_TRANSPARENT)) {
     INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf;
-    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
     INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
         bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
   }
@@ -371,17 +413,8 @@ ccl_device_forceinline int integrate_surface_volume_only_bounce(IntegratorState
     return LABEL_NONE;
   }
 
-  /* Setup ray position, direction stays unchanged. */
-  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
-
-  /* Clipping works through transparent. */
-  INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length;
-
-#  ifdef __RAY_DIFFERENTIALS__
-  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
-#  endif
-
-  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
+  /* Only modify start distance. */
+  INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length);
 
   return LABEL_TRANSMIT | LABEL_TRANSPARENT;
 }
@@ -416,23 +449,26 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
     return;
   }
 
-  float bsdf_u, bsdf_v;
-  path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+  const float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF);
 
   float3 ao_N;
-  const float3 ao_weight = shader_bsdf_ao(
+  const Spectrum ao_weight = surface_shader_ao(
       kg, sd, kernel_data.integrator.ao_additive_factor, &ao_N);
 
   float3 ao_D;
   float ao_pdf;
-  sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+  sample_cos_hemisphere(ao_N, rand_bsdf.x, rand_bsdf.y, &ao_D, &ao_pdf);
 
   bool skip_self = true;
 
   Ray ray ccl_optional_struct_init;
   ray.P = shadow_ray_offset(kg, sd, ao_D, &skip_self);
   ray.D = ao_D;
-  ray.t = kernel_data.integrator.ao_bounces_distance;
+  if (skip_self) {
+    ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D);
+  }
+  ray.tmin = 0.0f;
+  ray.tmax = kernel_data.integrator.ao_bounces_distance;
   ray.time = sd->time;
   ray.self.object = (skip_self) ? sd->object : OBJECT_NONE;
   ray.self.prim = (skip_self) ? sd->prim : PRIM_NONE;
@@ -442,7 +478,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
   ray.dD = differential_zero_compact();
 
   /* Branch off shadow kernel. */
-  INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao);
+  IntegratorShadowState shadow_state = integrator_shadow_path_init(
+      kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, true);
 
   /* Copy volume stack and enter/exit volume. */
   integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
@@ -458,7 +495,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
   const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
   const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
   uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag) | PATH_RAY_SHADOW_FOR_AO;
-  const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * shader_bsdf_alpha(kg, sd);
+  const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput) *
+                              surface_shader_alpha(kg, sd);
 
   INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
       state, path, render_pixel_index);
@@ -507,7 +545,7 @@ ccl_device bool integrate_surface(KernelGlobals kg,
     {
       /* Evaluate shader. */
       PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL);
-      shader_eval_surface<node_feature_mask>(kg, state, &sd, render_buffer, path_flag);
+      surface_shader_eval<node_feature_mask>(kg, state, &sd, render_buffer, path_flag);
 
       /* Initialize additional RNG for BSDFs. */
       if (sd.flag & SD_BSDF_NEEDS_LCG) {
@@ -529,21 +567,17 @@ ccl_device bool integrate_surface(KernelGlobals kg,
 #endif
     {
       /* Filter closures. */
-      shader_prepare_surface_closures(kg, state, &sd, path_flag);
+      surface_shader_prepare_closures(kg, state, &sd, path_flag);
 
-#ifdef __HOLDOUT__
       /* Evaluate holdout. */
       if (!integrate_surface_holdout(kg, state, &sd, render_buffer)) {
         return false;
       }
-#endif
 
-#ifdef __EMISSION__
       /* Write emission. */
       if (sd.flag & SD_EMISSION) {
         integrate_surface_emission(kg, state, &sd, render_buffer);
       }
-#endif
 
       /* Perform path termination. Most paths have already been terminated in
        * the intersect_closest kernel, this is just for emission and for dividing
@@ -557,11 +591,11 @@ ccl_device bool integrate_surface(KernelGlobals kg,
       /* Write render passes. */
 #ifdef __PASSES__
       PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES);
-      kernel_write_data_passes(kg, state, &sd, render_buffer);
+      film_write_data_passes(kg, state, &sd, render_buffer);
 #endif
 
 #ifdef __DENOISING_FEATURES__
-      kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
+      film_write_denoising_features_surface(kg, state, &sd, render_buffer);
 #endif
     }
 
@@ -604,22 +638,23 @@ ccl_device bool integrate_surface(KernelGlobals kg,
 }
 
 template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE,
-         int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
+         DeviceKernel current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
 ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
                                                      IntegratorState state,
                                                      ccl_global float *ccl_restrict render_buffer)
 {
   if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) {
     if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) {
-      INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
+      integrator_path_next(
+          kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
     }
     else {
-      kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f);
-      INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+      kernel_assert(INTEGRATOR_STATE(state, ray, tmax) != 0.0f);
+      integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
     }
   }
   else {
-    INTEGRATOR_PATH_TERMINATE(current_kernel);
+    integrator_path_terminate(kg, state, current_kernel);
   }
 }