1 files changed, 526 insertions, 714 deletions
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index f90701a8260..b0f53aef2d5 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -15,57 +15,350 @@
  */
 
 #ifdef __OSL__
-#  include "osl_shader.h"
+#  include "kernel/osl/osl_shader.h"
 #endif
 
-#include "kernel_random.h"
-#include "kernel_projection.h"
-#include "kernel_montecarlo.h"
-#include "kernel_differential.h"
-#include "kernel_camera.h"
+#include "kernel/kernel_random.h"
+#include "kernel/kernel_projection.h"
+#include "kernel/kernel_montecarlo.h"
+#include "kernel/kernel_differential.h"
+#include "kernel/kernel_camera.h"
 
-#include "geom/geom.h"
-#include "bvh/bvh.h"
+#include "kernel/geom/geom.h"
+#include "kernel/bvh/bvh.h"
 
-#include "kernel_accumulate.h"
-#include "kernel_shader.h"
-#include "kernel_light.h"
-#include "kernel_passes.h"
+#include "kernel/kernel_accumulate.h"
+#include "kernel/kernel_shader.h"
+#include "kernel/kernel_light.h"
+#include "kernel/kernel_passes.h"
 
-#ifdef __SUBSURFACE__
-#  include "kernel_subsurface.h"
+#if defined(__VOLUME__) || defined(__SUBSURFACE__)
+#  include "kernel/kernel_volume.h"
 #endif
 
-#ifdef __VOLUME__
-#  include "kernel_volume.h"
+#ifdef __SUBSURFACE__
+#  include "kernel/kernel_subsurface.h"
 #endif
 
-#include "kernel_path_state.h"
-#include "kernel_shadow.h"
-#include "kernel_emission.h"
-#include "kernel_path_common.h"
-#include "kernel_path_surface.h"
-#include "kernel_path_volume.h"
+#include "kernel/kernel_path_state.h"
+#include "kernel/kernel_shadow.h"
+#include "kernel/kernel_emission.h"
+#include "kernel/kernel_path_common.h"
+#include "kernel/kernel_path_surface.h"
+#include "kernel/kernel_path_volume.h"
+#include "kernel/kernel_path_subsurface.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_forceinline bool kernel_path_scene_intersect(
+	KernelGlobals *kg,
+	ccl_addr_space PathState *state,
+	Ray *ray,
+	Intersection *isect,
+	PathRadiance *L)
+{
+	uint visibility = path_state_ray_visibility(kg, state);
+
+	if(path_state_ao_bounce(kg, state)) {
+		visibility = PATH_RAY_SHADOW;
+		ray->t = kernel_data.background.ao_distance;
+	}
+
+#ifdef __HAIR__
+	float difl = 0.0f, extmax = 0.0f;
+	uint lcg_state = 0;
+
+	if(kernel_data.bvh.have_curves) {
+		if((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) {
+			float3 pixdiff = ray->dD.dx + ray->dD.dy;
+			/*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
+			difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
+		}
+
+		extmax = kernel_data.curve.maximum_width;
+		lcg_state = lcg_state_init_addrspace(state, 0x51633e2d);
+	}
+
+	bool hit = scene_intersect(kg, *ray, visibility, isect, &lcg_state, difl, extmax);
+#else
+	bool hit = scene_intersect(kg, *ray, visibility, isect, NULL, 0.0f, 0.0f);
+#endif  /* __HAIR__ */
 
 #ifdef __KERNEL_DEBUG__
-#  include "kernel_debug.h"
-#endif
+	if(state->flag & PATH_RAY_CAMERA) {
+		L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes;
+		L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances;
+		L->debug_data.num_bvh_intersections += isect->num_intersections;
+	}
+	L->debug_data.num_ray_bounces++;
+#endif  /* __KERNEL_DEBUG__ */
 
-CCL_NAMESPACE_BEGIN
+	return hit;
+}
+
+ccl_device_forceinline void kernel_path_lamp_emission(
+	KernelGlobals *kg,
+	ccl_addr_space PathState *state,
+	Ray *ray,
+	float3 throughput,
+	ccl_addr_space Intersection *isect,
+	ShaderData *emission_sd,
+	PathRadiance *L)
+{
+#ifdef __LAMP_MIS__
+	if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
+		/* ray starting from previous non-transparent bounce */
+		Ray light_ray;
+
+		light_ray.P = ray->P - state->ray_t*ray->D;
+		state->ray_t += isect->t;
+		light_ray.D = ray->D;
+		light_ray.t = state->ray_t;
+		light_ray.time = ray->time;
+		light_ray.dD = ray->dD;
+		light_ray.dP = ray->dP;
+
+		/* intersect with lamp */
+		float3 emission;
+
+		if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission))
+			path_radiance_accum_emission(L, state, throughput, emission);
+	}
+#endif  /* __LAMP_MIS__ */
+}
+
+ccl_device_forceinline void kernel_path_background(
+	KernelGlobals *kg,
+	ccl_addr_space PathState *state,
+	ccl_addr_space Ray *ray,
+	float3 throughput,
+	ShaderData *sd,
+	PathRadiance *L)
+{
+	/* eval background shader if nothing hit */
+	if(kernel_data.background.transparent && (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+		L->transparent += average(throughput);
+
+#ifdef __PASSES__
+		if(!(kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND)))
+#endif  /* __PASSES__ */
+			return;
+	}
+
+	/* When using the ao bounces approximation, adjust background
+	 * shader intensity with ao factor. */
+	if(path_state_ao_bounce(kg, state)) {
+		throughput *= kernel_data.background.ao_bounces_factor;
+	}
+
+#ifdef __BACKGROUND__
+	/* sample background shader */
+	float3 L_background = indirect_background(kg, sd, state, ray);
+	path_radiance_accum_background(L, state, throughput, L_background);
+#endif  /* __BACKGROUND__ */
+}
+
+#ifndef __SPLIT_KERNEL__
+
+#ifdef __VOLUME__
+ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
+	KernelGlobals *kg,
+	ShaderData *sd,
+	PathState *state,
+	Ray *ray,
+	float3 *throughput,
+	ccl_addr_space Intersection *isect,
+	bool hit,
+	ShaderData *emission_sd,
+	PathRadiance *L)
+{
+	/* Sanitize volume stack. */
+	if(!hit) {
+		kernel_volume_clean_stack(kg, state->volume_stack);
+	}
+
+	if(state->volume_stack[0].shader == SHADER_NONE) {
+		return VOLUME_PATH_ATTENUATED;
+	}
+
+	/* volume attenuation, emission, scatter */
+	Ray volume_ray = *ray;
+	volume_ray.t = (hit)? isect->t: FLT_MAX;
+
+	bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+#  ifdef __VOLUME_DECOUPLED__
+	int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+	bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
+	bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
+
+	if(decoupled) {
+		/* cache steps along volume for repeated sampling */
+		VolumeSegment volume_segment;
+
+		shader_setup_from_volume(kg, sd, &volume_ray);
+		kernel_volume_decoupled_record(kg, state,
+			&volume_ray, sd, &volume_segment, heterogeneous);
+
+		volume_segment.sampling_method = sampling_method;
+
+		/* emission */
+		if(volume_segment.closure_flag & SD_EMISSION)
+			path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
+
+		/* scattering */
+		VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+
+		if(volume_segment.closure_flag & SD_SCATTER) {
+			int all = kernel_data.integrator.sample_all_lights_indirect;
+
+			/* direct light sampling */
+			kernel_branched_path_volume_connect_light(kg, sd,
+				emission_sd, *throughput, state, L, all,
+				&volume_ray, &volume_segment);
+
+			/* indirect sample. if we use distance sampling and take just
+			 * one sample for direct and indirect light, we could share
+			 * this computation, but makes code a bit complex */
+			float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+			float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+
+			result = kernel_volume_decoupled_scatter(kg,
+				state, &volume_ray, sd, throughput,
+				rphase, rscatter, &volume_segment, NULL, true);
+		}
+
+		/* free cached steps */
+		kernel_volume_decoupled_free(kg, &volume_segment);
+
+		if(result == VOLUME_PATH_SCATTERED) {
+			if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+				return VOLUME_PATH_SCATTERED;
+			else
+				return VOLUME_PATH_MISSED;
+		}
+		else {
+			*throughput *= volume_segment.accum_transmittance;
+		}
+	}
+	else
+#  endif  /* __VOLUME_DECOUPLED__ */
+	{
+		/* integrate along volume segment with distance sampling */
+		VolumeIntegrateResult result = kernel_volume_integrate(
+			kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+
+#  ifdef __VOLUME_SCATTER__
+		if(result == VOLUME_PATH_SCATTERED) {
+			/* direct lighting */
+			kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+			/* indirect light bounce */
+			if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+				return VOLUME_PATH_SCATTERED;
+			else
+				return VOLUME_PATH_MISSED;
+		}
+#  endif  /* __VOLUME_SCATTER__ */
+	}
+
+	return VOLUME_PATH_ATTENUATED;
+}
+#endif  /* __VOLUME__ */
+
+#endif /* __SPLIT_KERNEL__ */
+
+ccl_device_forceinline bool kernel_path_shader_apply(
+	KernelGlobals *kg,
+	ShaderData *sd,
+	ccl_addr_space PathState *state,
+	ccl_addr_space Ray *ray,
+	float3 throughput,
+	ShaderData *emission_sd,
+	PathRadiance *L,
+	ccl_global float *buffer)
+{
+#ifdef __SHADOW_TRICKS__
+	if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
+		if(state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) {
+			state->flag |= (PATH_RAY_SHADOW_CATCHER |
+						   PATH_RAY_STORE_SHADOW_INFO);
+
+			float3 bg = make_float3(0.0f, 0.0f, 0.0f);
+			if(!kernel_data.background.transparent) {
+				bg = indirect_background(kg, emission_sd, state, ray);
+			}
+			path_radiance_accum_shadowcatcher(L, throughput, bg);
+		}
+	}
+	else if(state->flag & PATH_RAY_SHADOW_CATCHER) {
+		/* Only update transparency after shadow catcher bounce. */
+		L->shadow_transparency *=
+				average(shader_bsdf_transparency(kg, sd));
+	}
+#endif  /* __SHADOW_TRICKS__ */
+
+	/* holdout */
+#ifdef __HOLDOUT__
+	if(((sd->flag & SD_HOLDOUT) ||
+		(sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
+	   (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND))
+	{
+		if(kernel_data.background.transparent) {
+			float3 holdout_weight;
+			if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+				holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
+			}
+			else {
+				holdout_weight = shader_holdout_eval(kg, sd);
+			}
+			/* any throughput is ok, should all be identical here */
+			L->transparent += average(holdout_weight*throughput);
+		}
+
+		if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+			return false;
+		}
+	}
+#endif  /* __HOLDOUT__ */
+
+	/* holdout mask objects do not write data passes */
+	kernel_write_data_passes(kg, buffer, L, sd, state, throughput);
+
+	/* blurring of bsdf after bounces, for rays that have a small likelihood
+	 * of following this particular path (diffuse, rough glossy) */
+	if(kernel_data.integrator.filter_glossy != FLT_MAX) {
+		float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
+
+		if(blur_pdf < 1.0f) {
+			float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
+			shader_bsdf_blur(kg, sd, blur_roughness);
+		}
+	}
+
+#ifdef __EMISSION__
+	/* emission */
+	if(sd->flag & SD_EMISSION) {
+		float3 emission = indirect_primitive_emission(kg, sd, sd->ray_length, state->flag, state->ray_pdf);
+		path_radiance_accum_emission(L, state, throughput, emission);
+	}
+#endif  /* __EMISSION__ */
+
+	return true;
+}
 
 ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
                                         ShaderData *sd,
                                         ShaderData *emission_sd,
                                         PathRadiance *L,
-                                        PathState *state,
-                                        RNG *rng,
+                                        ccl_addr_space PathState *state,
                                         float3 throughput,
                                         float3 ao_alpha)
 {
 	/* todo: solve correlation */
 	float bsdf_u, bsdf_v;
 
-	path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+	path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
 
 	float ao_factor = kernel_data.background.ao_factor;
 	float3 ao_N;
@@ -75,278 +368,118 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
 
 	sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
 
-	if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
+	if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
 		Ray light_ray;
 		float3 ao_shadow;
 
-		light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
+		light_ray.P = ray_offset(sd->P, sd->Ng);
 		light_ray.D = ao_D;
 		light_ray.t = kernel_data.background.ao_distance;
-#ifdef __OBJECT_MOTION__
-		light_ray.time = ccl_fetch(sd, time);
-#endif  /* __OBJECT_MOTION__ */
-		light_ray.dP = ccl_fetch(sd, dP);
+		light_ray.time = sd->time;
+		light_ray.dP = sd->dP;
 		light_ray.dD = differential3_zero();
 
-		if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
-			path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
+		if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
+			path_radiance_accum_ao(L, state, throughput, ao_alpha, ao_bsdf, ao_shadow);
+		}
+		else {
+			path_radiance_accum_total_ao(L, state, throughput, ao_bsdf);
 		}
 	}
 }
 
+#ifndef __SPLIT_KERNEL__
+
+#if defined(__BRANCHED_PATH__) || defined(__BAKING__)
+
 ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                      ShaderData *sd,
                                      ShaderData *emission_sd,
-                                     RNG *rng,
                                      Ray *ray,
                                      float3 throughput,
-                                     int num_samples,
                                      PathState *state,
                                      PathRadiance *L)
 {
+#ifdef __SUBSURFACE__
+	SubsurfaceIndirectRays ss_indirect;
+	kernel_path_subsurface_init_indirect(&ss_indirect);
+
+	for(;;) {
+#endif  /* __SUBSURFACE__ */
+
 	/* path iteration */
 	for(;;) {
-		/* intersect scene */
+		/* Find intersection with objects in scene. */
 		Intersection isect;
-		uint visibility = path_state_ray_visibility(kg, state);
-		if(state->bounce > kernel_data.integrator.ao_bounces) {
-			visibility = PATH_RAY_SHADOW;
-			ray->t = kernel_data.background.ao_distance;
-		}
-		bool hit = scene_intersect(kg,
-		                           *ray,
-		                           visibility,
-		                           &isect,
-		                           NULL,
-		                           0.0f, 0.0f);
+		bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
 
-#ifdef __LAMP_MIS__
-		if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
-			/* ray starting from previous non-transparent bounce */
-			Ray light_ray;
-
-			light_ray.P = ray->P - state->ray_t*ray->D;
-			state->ray_t += isect.t;
-			light_ray.D = ray->D;
-			light_ray.t = state->ray_t;
-			light_ray.time = ray->time;
-			light_ray.dD = ray->dD;
-			light_ray.dP = ray->dP;
-
-			/* intersect with lamp */
-			float3 emission;
-			if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
-				path_radiance_accum_emission(L,
-				                             throughput,
-				                             emission,
-				                             state->bounce);
-			}
-		}
-#endif  /* __LAMP_MIS__ */
+		/* Find intersection with lamps and compute emission for MIS. */
+		kernel_path_lamp_emission(kg, state, ray, throughput, &isect, sd, L);
 
 #ifdef __VOLUME__
-		/* Sanitize volume stack. */
-		if(!hit) {
-			kernel_volume_clean_stack(kg, state->volume_stack);
-		}
-		/* volume attenuation, emission, scatter */
-		if(state->volume_stack[0].shader != SHADER_NONE) {
-			Ray volume_ray = *ray;
-			volume_ray.t = (hit)? isect.t: FLT_MAX;
-
-			bool heterogeneous =
-			        volume_stack_is_heterogeneous(kg,
-			                                      state->volume_stack);
-
-#  ifdef __VOLUME_DECOUPLED__
-			int sampling_method =
-			        volume_stack_sampling_method(kg,
-			                                     state->volume_stack);
-			bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
-
-			if(decoupled) {
-				/* cache steps along volume for repeated sampling */
-				VolumeSegment volume_segment;
-
-				shader_setup_from_volume(kg,
-				                         sd,
-				                         &volume_ray);
-				kernel_volume_decoupled_record(kg,
-				                               state,
-				                               &volume_ray,
-				                               sd,
-				                               &volume_segment,
-				                               heterogeneous);
-
-				volume_segment.sampling_method = sampling_method;
-
-				/* emission */
-				if(volume_segment.closure_flag & SD_EMISSION) {
-					path_radiance_accum_emission(L,
-					                             throughput,
-					                             volume_segment.accum_emission,
-					                             state->bounce);
-				}
-
-				/* scattering */
-				VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
-
-				if(volume_segment.closure_flag & SD_SCATTER) {
-					int all = kernel_data.integrator.sample_all_lights_indirect;
-
-					/* direct light sampling */
-					kernel_branched_path_volume_connect_light(kg,
-					                                          rng,
-					                                          sd,
-					                                          emission_sd,
-					                                          throughput,
-					                                          state,
-					                                          L,
-					                                          all,
-					                                          &volume_ray,
-					                                          &volume_segment);
-
-					/* indirect sample. if we use distance sampling and take just
-					 * one sample for direct and indirect light, we could share
-					 * this computation, but makes code a bit complex */
-					float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
-					float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
-
-					result = kernel_volume_decoupled_scatter(kg,
-					                                         state,
-					                                         &volume_ray,
-					                                         sd,
-					                                         &throughput,
-					                                         rphase,
-					                                         rscatter,
-					                                         &volume_segment,
-					                                         NULL,
-					                                         true);
-				}
-
-				/* free cached steps */
-				kernel_volume_decoupled_free(kg, &volume_segment);
-
-				if(result == VOLUME_PATH_SCATTERED) {
-					if(kernel_path_volume_bounce(kg,
-					                             rng,
-					                             sd,
-					                             &throughput,
-					                             state,
-					                             L,
-					                             ray))
-					{
-						continue;
-					}
-					else {
-						break;
-					}
-				}
-				else {
-					throughput *= volume_segment.accum_transmittance;
-				}
-			}
-			else
-#  endif  /* __VOLUME_DECOUPLED__ */
-			{
-				/* integrate along volume segment with distance sampling */
-				VolumeIntegrateResult result = kernel_volume_integrate(
-					kg, state, sd, &volume_ray, L, &throughput, rng, heterogeneous);
-
-#  ifdef __VOLUME_SCATTER__
-				if(result == VOLUME_PATH_SCATTERED) {
-					/* direct lighting */
-					kernel_path_volume_connect_light(kg,
-					                                 rng,
-					                                 sd,
-					                                 emission_sd,
-					                                 throughput,
-					                                 state,
-					                                 L);
-
-					/* indirect light bounce */
-					if(kernel_path_volume_bounce(kg,
-					                             rng,
-					                             sd,
-					                             &throughput,
-					                             state,
-					                             L,
-					                             ray))
-					{
-						continue;
-					}
-					else {
-						break;
-					}
-				}
-#  endif  /* __VOLUME_SCATTER__ */
-			}
+		/* Volume integration. */
+		VolumeIntegrateResult result = kernel_path_volume(kg,
+		                                                   sd,
+		                                                   state,
+		                                                   ray,
+		                                                   &throughput,
+		                                                   &isect,
+		                                                   hit,
+		                                                   emission_sd,
+		                                                   L);
+
+		if(result == VOLUME_PATH_SCATTERED) {
+			continue;
+		}
+		else if(result == VOLUME_PATH_MISSED) {
+			break;
 		}
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__*/
 
+		/* Shade background. */
 		if(!hit) {
-#ifdef __BACKGROUND__
-			/* sample background shader */
-			float3 L_background = indirect_background(kg, emission_sd, state, ray);
-			path_radiance_accum_background(L,
-			                               throughput,
-			                               L_background,
-			                               state->bounce);
-#endif  /* __BACKGROUND__ */
-
+			kernel_path_background(kg, state, ray, throughput, sd, L);
 			break;
 		}
-		else if(state->bounce > kernel_data.integrator.ao_bounces) {
+		else if(path_state_ao_bounce(kg, state)) {
 			break;
 		}
 
-		/* setup shading */
-		shader_setup_from_ray(kg,
-		                      sd,
-		                      &isect,
-		                      ray);
-		float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
-		shader_eval_surface(kg, sd, rng, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
-#ifdef __BRANCHED_PATH__
-		shader_merge_closures(sd);
-#endif  /* __BRANCHED_PATH__ */
-
-		/* blurring of bsdf after bounces, for rays that have a small likelihood
-		 * of following this particular path (diffuse, rough glossy) */
-		if(kernel_data.integrator.filter_glossy != FLT_MAX) {
-			float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
-
-			if(blur_pdf < 1.0f) {
-				float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
-				shader_bsdf_blur(kg, sd, blur_roughness);
-			}
-		}
+		/* Setup shader data. */
+		shader_setup_from_ray(kg, sd, &isect, ray);
 
-#ifdef __EMISSION__
-		/* emission */
-		if(sd->flag & SD_EMISSION) {
-			float3 emission = indirect_primitive_emission(kg,
-			                                              sd,
-			                                              isect.t,
-			                                              state->flag,
-			                                              state->ray_pdf);
-			path_radiance_accum_emission(L, throughput, emission, state->bounce);
+		/* Skip most work for volume bounding surface. */
+#ifdef __VOLUME__
+		if(!(sd->flag & SD_HAS_ONLY_VOLUME)) {
+#endif
+
+		/* Evaluate shader. */
+		shader_eval_surface(kg, sd, state, state->flag);
+		shader_prepare_closures(sd, state);
+
+		/* Apply shadow catcher, holdout, emission. */
+		if(!kernel_path_shader_apply(kg,
+		                             sd,
+		                             state,
+		                             ray,
+		                             throughput,
+		                             emission_sd,
+		                             L,
+		                             NULL))
+		{
+			break;
 		}
-#endif  /* __EMISSION__ */
 
 		/* path termination. this is a strange place to put the termination, it's
 		 * mainly due to the mixed in MIS that we use. gives too many unneeded
 		 * shader evaluations, only need emission if we are going to terminate */
-		float probability =
-		        path_state_terminate_probability(kg,
-		                                         state,
-		                                         throughput*num_samples);
+		float probability = path_state_continuation_probability(kg, state, throughput);
 
 		if(probability == 0.0f) {
 			break;
 		}
 		else if(probability != 1.0f) {
-			float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
+			float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
 
 			if(terminate >= probability)
 				break;
@@ -354,50 +487,39 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 			throughput /= probability;
 		}
 
+		kernel_update_denoising_features(kg, sd, state, L);
+
 #ifdef __AO__
 		/* ambient occlusion */
 		if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
-			kernel_path_ao(kg, sd, emission_sd, L, state, rng, throughput, make_float3(0.0f, 0.0f, 0.0f));
+			kernel_path_ao(kg, sd, emission_sd, L, state, throughput, make_float3(0.0f, 0.0f, 0.0f));
 		}
 #endif  /* __AO__ */
 
+
 #ifdef __SUBSURFACE__
 		/* bssrdf scatter to a different location on the same object, replacing
 		 * the closures with a diffuse BSDF */
 		if(sd->flag & SD_BSSRDF) {
-			float bssrdf_probability;
-			ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
-
-			/* modify throughput for picking bssrdf or bsdf */
-			throughput *= bssrdf_probability;
-
-			/* do bssrdf scatter step if we picked a bssrdf closure */
-			if(sc) {
-				uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
-
-				float bssrdf_u, bssrdf_v;
-				path_state_rng_2D(kg,
-				                  rng,
-				                  state,
-				                  PRNG_BSDF_U,
-				                  &bssrdf_u, &bssrdf_v);
-				subsurface_scatter_step(kg,
-				                        sd,
-				                        state,
-				                        state->flag,
-				                        sc,
-				                        &lcg_state,
-				                        bssrdf_u, bssrdf_v,
-				                        false);
+			if(kernel_path_subsurface_scatter(kg,
+			                                  sd,
+			                                  emission_sd,
+			                                  L,
+			                                  state,
+			                                  ray,
+			                                  &throughput,
+			                                  &ss_indirect))
+			{
+				break;
 			}
 		}
 #endif  /* __SUBSURFACE__ */
 
-#if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
+#if defined(__EMISSION__)
 		if(kernel_data.integrator.use_direct_light) {
-			int all = kernel_data.integrator.sample_all_lights_indirect;
+			int all = (kernel_data.integrator.sample_all_lights_indirect) ||
+			          (state->flag & PATH_RAY_SHADOW_CATCHER);
 			kernel_branched_path_surface_connect_light(kg,
-			                                           rng,
 			                                           sd,
 			                                           emission_sd,
 			                                           state,
@@ -406,205 +528,48 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 			                                           L,
 			                                           all);
 		}
-#endif  /* defined(__EMISSION__) && defined(__BRANCHED_PATH__) */
+#endif  /* defined(__EMISSION__) */
 
-		if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray))
+#ifdef __VOLUME__
+		}
+#endif
+
+		if(!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray))
 			break;
 	}
-}
 
 #ifdef __SUBSURFACE__
-#  ifndef __KERNEL_CUDA__
-ccl_device
-#  else
-ccl_device_inline
-#  endif
-bool kernel_path_subsurface_scatter(
-        KernelGlobals *kg,
-        ShaderData *sd,
-        ShaderData *emission_sd,
-        PathRadiance *L,
-        PathState *state,
-        RNG *rng,
-        Ray *ray,
-        float3 *throughput,
-        SubsurfaceIndirectRays *ss_indirect)
-{
-	float bssrdf_probability;
-	ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
-
-	/* modify throughput for picking bssrdf or bsdf */
-	*throughput *= bssrdf_probability;
-
-	/* do bssrdf scatter step if we picked a bssrdf closure */
-	if(sc) {
-		/* We should never have two consecutive BSSRDF bounces,
-		 * the second one should be converted to a diffuse BSDF to
-		 * avoid this.
+		/* Trace indirect subsurface rays by restarting the loop. this uses less
+		 * stack memory than invoking kernel_path_indirect.
 		 */
-		kernel_assert(!ss_indirect->tracing);
-
-		uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
-
-		SubsurfaceIntersection ss_isect;
-		float bssrdf_u, bssrdf_v;
-		path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
-		int num_hits = subsurface_scatter_multi_intersect(kg,
-		                                                  &ss_isect,
-		                                                  sd,
-		                                                  sc,
-		                                                  &lcg_state,
-		                                                  bssrdf_u, bssrdf_v,
-		                                                  false);
-#  ifdef __VOLUME__
-		ss_indirect->need_update_volume_stack =
-		        kernel_data.integrator.use_volumes &&
-		        ccl_fetch(sd, object_flag) & SD_OBJECT_INTERSECTS_VOLUME;
-#  endif  /* __VOLUME__ */
-
-		/* compute lighting with the BSDF closure */
-		for(int hit = 0; hit < num_hits; hit++) {
-			/* NOTE: We reuse the existing ShaderData, we assume the path
-			 * integration loop stops when this function returns true.
-			 */
-			subsurface_scatter_multi_setup(kg,
-			                               &ss_isect,
-			                               hit,
-			                               sd,
-			                               state,
-			                               state->flag,
-			                               sc,
-			                               false);
-
-			PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
-			Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
-			float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
-			PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
-
-			*hit_state = *state;
-			*hit_ray = *ray;
-			*hit_tp = *throughput;
-
-			hit_state->rng_offset += PRNG_BOUNCE_NUM;
-
-			path_radiance_init(hit_L, kernel_data.film.use_light_pass);
-			hit_L->direct_throughput = L->direct_throughput;
-			path_radiance_copy_indirect(hit_L, L);
-
-			kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
-
-			if(kernel_path_surface_bounce(kg,
-			                              rng,
-			                              sd,
-			                              hit_tp,
-			                              hit_state,
-			                              hit_L,
-			                              hit_ray))
-			{
-#  ifdef __LAMP_MIS__
-				hit_state->ray_t = 0.0f;
-#  endif  /* __LAMP_MIS__ */
-
-#  ifdef __VOLUME__
-				if(ss_indirect->need_update_volume_stack) {
-					Ray volume_ray = *ray;
-					/* Setup ray from previous surface point to the new one. */
-					volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
-					                             &volume_ray.t);
-
-					kernel_volume_stack_update_for_subsurface(
-					    kg,
-					    emission_sd,
-					    &volume_ray,
-					    hit_state->volume_stack);
-				}
-#  endif  /* __VOLUME__ */
-				path_radiance_reset_indirect(L);
-				ss_indirect->num_rays++;
-			}
-			else {
-				path_radiance_accum_sample(L, hit_L, 1);
-			}
+		if(ss_indirect.num_rays) {
+			kernel_path_subsurface_setup_indirect(kg,
+			                                      &ss_indirect,
+			                                      state,
+			                                      ray,
+			                                      L,
+			                                      &throughput);
 		}
-		return true;
-	}
-	return false;
-}
-
-ccl_device_inline void kernel_path_subsurface_init_indirect(
-        SubsurfaceIndirectRays *ss_indirect)
-{
-	ss_indirect->tracing = false;
-	ss_indirect->num_rays = 0;
-}
-
-ccl_device void kernel_path_subsurface_accum_indirect(
-        SubsurfaceIndirectRays *ss_indirect,
-        PathRadiance *L)
-{
-	if(ss_indirect->tracing) {
-		path_radiance_sum_indirect(L);
-		path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
-		if(ss_indirect->num_rays == 0) {
-			*L = ss_indirect->direct_L;
+		else {
+			break;
 		}
 	}
+#endif  /* __SUBSURFACE__ */
 }
 
-ccl_device void kernel_path_subsurface_setup_indirect(
-        KernelGlobals *kg,
-        SubsurfaceIndirectRays *ss_indirect,
-        PathState *state,
-        Ray *ray,
-        PathRadiance *L,
-        float3 *throughput)
-{
-	if(!ss_indirect->tracing) {
-		ss_indirect->direct_L = *L;
-	}
-	ss_indirect->tracing = true;
-
-	/* Setup state, ray and throughput for indirect SSS rays. */
-	ss_indirect->num_rays--;
-
-	Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
-	PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
-
-	*state = ss_indirect->state[ss_indirect->num_rays];
-	*ray = *indirect_ray;
-	*L = *indirect_L;
-	*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
-
-	state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
-}
-
-#endif  /* __SUBSURFACE__ */
+#endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
 
-ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
-                                               RNG *rng,
-                                               int sample,
-                                               Ray ray,
-                                               ccl_global float *buffer)
+ccl_device_forceinline void kernel_path_integrate(
+	KernelGlobals *kg,
+	PathState *state,
+	float3 throughput,
+	Ray *ray,
+	PathRadiance *L,
+	ccl_global float *buffer,
+	ShaderData *emission_sd)
 {
-	/* initialize */
-	PathRadiance L;
-	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-	float L_transparent = 0.0f;
-
-	path_radiance_init(&L, kernel_data.film.use_light_pass);
-
-	/* shader data memory used for both volumes and surfaces, saves stack space */
+	/* Shader data memory used for both volumes and surfaces, saves stack space. */
 	ShaderData sd;
-	/* shader data used by emission, shadows, volume stacks */
-	ShaderData emission_sd;
-
-	PathState state;
-	path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
-
-#ifdef __KERNEL_DEBUG__
-	DebugData debug_data;
-	debug_data_init(&debug_data);
-#endif  /* __KERNEL_DEBUG__ */
 
 #ifdef __SUBSURFACE__
 	SubsurfaceIndirectRays ss_indirect;
@@ -615,248 +580,89 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 
 	/* path iteration */
 	for(;;) {
-		/* intersect scene */
+		/* Find intersection with objects in scene. */
 		Intersection isect;
-		uint visibility = path_state_ray_visibility(kg, &state);
-
-#ifdef __HAIR__
-		float difl = 0.0f, extmax = 0.0f;
-		uint lcg_state = 0;
-
-		if(kernel_data.bvh.have_curves) {
-			if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {	
-				float3 pixdiff = ray.dD.dx + ray.dD.dy;
-				/*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
-				difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
-			}
+		bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
 
-			extmax = kernel_data.curve.maximum_width;
-			lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
-		}
-
-		if(state.bounce > kernel_data.integrator.ao_bounces) {
-			visibility = PATH_RAY_SHADOW;
-			ray.t = kernel_data.background.ao_distance;
-		}
-
-		bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
-#else
-		bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
-#endif  /* __HAIR__ */
-
-#ifdef __KERNEL_DEBUG__
-		if(state.flag & PATH_RAY_CAMERA) {
-			debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
-			debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
-			debug_data.num_bvh_intersections += isect.num_intersections;
-		}
-		debug_data.num_ray_bounces++;
-#endif  /* __KERNEL_DEBUG__ */
-
-#ifdef __LAMP_MIS__
-		if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
-			/* ray starting from previous non-transparent bounce */
-			Ray light_ray;
-
-			light_ray.P = ray.P - state.ray_t*ray.D;
-			state.ray_t += isect.t;
-			light_ray.D = ray.D;
-			light_ray.t = state.ray_t;
-			light_ray.time = ray.time;
-			light_ray.dD = ray.dD;
-			light_ray.dP = ray.dP;
-
-			/* intersect with lamp */
-			float3 emission;
-
-			if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
-				path_radiance_accum_emission(&L, throughput, emission, state.bounce);
-		}
-#endif  /* __LAMP_MIS__ */
+		/* Find intersection with lamps and compute emission for MIS. */
+		kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L);
 
 #ifdef __VOLUME__
-		/* Sanitize volume stack. */
-		if(!hit) {
-			kernel_volume_clean_stack(kg, state.volume_stack);
-		}
-		/* volume attenuation, emission, scatter */
-		if(state.volume_stack[0].shader != SHADER_NONE) {
-			Ray volume_ray = ray;
-			volume_ray.t = (hit)? isect.t: FLT_MAX;
-
-			bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
-
-#  ifdef __VOLUME_DECOUPLED__
-			int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
-			bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
-
-			if(decoupled) {
-				/* cache steps along volume for repeated sampling */
-				VolumeSegment volume_segment;
-
-				shader_setup_from_volume(kg, &sd, &volume_ray);
-				kernel_volume_decoupled_record(kg, &state,
-					&volume_ray, &sd, &volume_segment, heterogeneous);
-
-				volume_segment.sampling_method = sampling_method;
-
-				/* emission */
-				if(volume_segment.closure_flag & SD_EMISSION)
-					path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
-
-				/* scattering */
-				VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
-
-				if(volume_segment.closure_flag & SD_SCATTER) {
-					int all = false;
-
-					/* direct light sampling */
-					kernel_branched_path_volume_connect_light(kg, rng, &sd,
-						&emission_sd, throughput, &state, &L, all,
-						&volume_ray, &volume_segment);
-
-					/* indirect sample. if we use distance sampling and take just
-					 * one sample for direct and indirect light, we could share
-					 * this computation, but makes code a bit complex */
-					float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
-					float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
-
-					result = kernel_volume_decoupled_scatter(kg,
-						&state, &volume_ray, &sd, &throughput,
-						rphase, rscatter, &volume_segment, NULL, true);
-				}
-
-				/* free cached steps */
-				kernel_volume_decoupled_free(kg, &volume_segment);
-
-				if(result == VOLUME_PATH_SCATTERED) {
-					if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
-						continue;
-					else
-						break;
-				}
-				else {
-					throughput *= volume_segment.accum_transmittance;
-				}
-			}
-			else
-#  endif  /* __VOLUME_DECOUPLED__ */
-			{
-				/* integrate along volume segment with distance sampling */
-				VolumeIntegrateResult result = kernel_volume_integrate(
-					kg, &state, &sd, &volume_ray, &L, &throughput, rng, heterogeneous);
-
-#  ifdef __VOLUME_SCATTER__
-				if(result == VOLUME_PATH_SCATTERED) {
-					/* direct lighting */
-					kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
-
-					/* indirect light bounce */
-					if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
-						continue;
-					else
-						break;
-				}
-#  endif  /* __VOLUME_SCATTER__ */
-			}
+		/* Volume integration. */
+		VolumeIntegrateResult result = kernel_path_volume(kg,
+		                                                   &sd,
+		                                                   state,
+		                                                   ray,
+		                                                   &throughput,
+		                                                   &isect,
+		                                                   hit,
+		                                                   emission_sd,
+		                                                   L);
+
+		if(result == VOLUME_PATH_SCATTERED) {
+			continue;
+		}
+		else if(result == VOLUME_PATH_MISSED) {
+			break;
 		}
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__*/
 
+		/* Shade background. */
 		if(!hit) {
-			/* eval background shader if nothing hit */
-			if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
-				L_transparent += average(throughput);
-
-#ifdef __PASSES__
-				if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
-#endif  /* __PASSES__ */
-					break;
-			}
-
-#ifdef __BACKGROUND__
-			/* sample background shader */
-			float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
-			path_radiance_accum_background(&L, throughput, L_background, state.bounce);
-#endif  /* __BACKGROUND__ */
-
+			kernel_path_background(kg, state, ray, throughput, &sd, L);
 			break;
 		}
-		else if(state.bounce > kernel_data.integrator.ao_bounces) {
+		else if(path_state_ao_bounce(kg, state)) {
 			break;
 		}
 
-		/* setup shading */
-		shader_setup_from_ray(kg, &sd, &isect, &ray);
-		float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
-		shader_eval_surface(kg, &sd, rng, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
-
-		/* holdout */
-#ifdef __HOLDOUT__
-		if(((sd.flag & SD_HOLDOUT) ||
-		    (sd.object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
-		   (state.flag & PATH_RAY_CAMERA))
-		{
-			if(kernel_data.background.transparent) {
-				float3 holdout_weight;
-				if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
-					holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
-				}
-				else {
-					holdout_weight = shader_holdout_eval(kg, &sd);
-				}
-				/* any throughput is ok, should all be identical here */
-				L_transparent += average(holdout_weight*throughput);
-			}
+		/* Setup shader data. */
+		shader_setup_from_ray(kg, &sd, &isect, ray);
 
-			if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
-				break;
-			}
-		}
-#endif  /* __HOLDOUT__ */
-
-		/* holdout mask objects do not write data passes */
-		kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
-
-		/* blurring of bsdf after bounces, for rays that have a small likelihood
-		 * of following this particular path (diffuse, rough glossy) */
-		if(kernel_data.integrator.filter_glossy != FLT_MAX) {
-			float blur_pdf = kernel_data.integrator.filter_glossy*state.min_ray_pdf;
-
-			if(blur_pdf < 1.0f) {
-				float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
-				shader_bsdf_blur(kg, &sd, blur_roughness);
-			}
-		}
+		/* Skip most work for volume bounding surface. */
+#ifdef __VOLUME__
+		if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+#endif
 
-#ifdef __EMISSION__
-		/* emission */
-		if(sd.flag & SD_EMISSION) {
-			/* todo: is isect.t wrong here for transparent surfaces? */
-			float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
-			path_radiance_accum_emission(&L, throughput, emission, state.bounce);
+		/* Evaluate shader. */
+		shader_eval_surface(kg, &sd, state, state->flag);
+		shader_prepare_closures(&sd, state);
+
+		/* Apply shadow catcher, holdout, emission. */
+		if(!kernel_path_shader_apply(kg,
+		                             &sd,
+		                             state,
+		                             ray,
+		                             throughput,
+		                             emission_sd,
+		                             L,
+		                             buffer))
+		{
+			break;
 		}
-#endif  /* __EMISSION__ */
 
 		/* path termination. this is a strange place to put the termination, it's
 		 * mainly due to the mixed in MIS that we use. gives too many unneeded
 		 * shader evaluations, only need emission if we are going to terminate */
-		float probability = path_state_terminate_probability(kg, &state, throughput);
+		float probability = path_state_continuation_probability(kg, state, throughput);
 
 		if(probability == 0.0f) {
 			break;
 		}
 		else if(probability != 1.0f) {
-			float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
+			float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
 			if(terminate >= probability)
 				break;
 
 			throughput /= probability;
 		}
 
+		kernel_update_denoising_features(kg, &sd, state, L);
+
 #ifdef __AO__
 		/* ambient occlusion */
 		if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
-			kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput, shader_bsdf_alpha(kg, &sd));
+			kernel_path_ao(kg, &sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, &sd));
 		}
 #endif  /* __AO__ */
 
@@ -866,11 +672,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 		if(sd.flag & SD_BSSRDF) {
 			if(kernel_path_subsurface_scatter(kg,
 			                                  &sd,
-			                                  &emission_sd,
-			                                  &L,
-			                                  &state,
-			                                  rng,
-			                                  &ray,
+			                                  emission_sd,
+			                                  L,
+			                                  state,
+			                                  ray,
 			                                  &throughput,
 			                                  &ss_indirect))
 			{
@@ -880,25 +685,27 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 #endif  /* __SUBSURFACE__ */
 
 		/* direct lighting */
-		kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
+		kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L);
+
+#ifdef __VOLUME__
+		}
+#endif
 
 		/* compute direct lighting and next bounce */
-		if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
+		if(!kernel_path_surface_bounce(kg, &sd, &throughput, state, &L->state, ray))
 			break;
 	}
 
 #ifdef __SUBSURFACE__
-		kernel_path_subsurface_accum_indirect(&ss_indirect, &L);
-
 		/* Trace indirect subsurface rays by restarting the loop. this uses less
 		 * stack memory than invoking kernel_path_indirect.
 		 */
 		if(ss_indirect.num_rays) {
 			kernel_path_subsurface_setup_indirect(kg,
 			                                      &ss_indirect,
-			                                      &state,
-			                                      &ray,
-			                                      &L,
+			                                      state,
+			                                      ray,
+			                                      L,
 			                                      &throughput);
 		}
 		else {
@@ -906,48 +713,53 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 		}
 	}
 #endif  /* __SUBSURFACE__ */
-
-	float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
-
-	kernel_write_light_passes(kg, buffer, &L, sample);
-
-#ifdef __KERNEL_DEBUG__
-	kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
-#endif  /* __KERNEL_DEBUG__ */
-
-	return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
 }
 
 ccl_device void kernel_path_trace(KernelGlobals *kg,
-	ccl_global float *buffer, ccl_global uint *rng_state,
+	ccl_global float *buffer,
 	int sample, int x, int y, int offset, int stride)
 {
 	/* buffer offset */
 	int index = offset + x + y*stride;
 	int pass_stride = kernel_data.film.pass_stride;
 
-	rng_state += index;
 	buffer += index*pass_stride;
 
-	/* initialize random numbers and ray */
-	RNG rng;
+	/* Initialize random numbers and sample ray. */
+	uint rng_hash;
 	Ray ray;
 
-	kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
+	kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
 
-	/* integrate */
-	float4 L;
+	if(ray.t == 0.0f) {
+		return;
+	}
 
-	if(ray.t != 0.0f)
-		L = kernel_path_integrate(kg, &rng, sample, ray, buffer);
-	else
-		L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+	/* Initialize state. */
+	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
 
-	/* accumulate result in output buffer */
-	kernel_write_pass_float4(buffer, sample, L);
+	PathRadiance L;
+	path_radiance_init(&L, kernel_data.film.use_light_pass);
+
+	ShaderDataTinyStorage emission_sd_storage;
+	ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
 
-	path_rng_end(kg, rng_state, rng);
+	PathState state;
+	path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
+
+	/* Integrate. */
+	kernel_path_integrate(kg,
+	                      &state,
+	                      throughput,
+	                      &ray,
+	                      &L,
+	                      buffer,
+	                      emission_sd);
+
+	kernel_write_result(kg, buffer, sample, &L);
 }
 
+#endif  /* __SPLIT_KERNEL__ */
+
 CCL_NAMESPACE_END