Cycles: Branched path tracing for the split kernel

This implements branched path tracing for the split kernel. General approach is to store the ray state at a branch point, trace the branched ray as normal, then restore the state as necessary before iterating to the next part of the path. A state machine is used to advance the indirect loop state, which avoids the need to add any new kernels. Each iteration the state machine recreates as much state as possible from the stored ray to keep overall storage down. Its kind of hard to keep all the different integration loops in sync, so this needs lots of testing to make sure everything is working correctly. We should probably start trying to deduplicate the integration loops more now. Nonbranched BMW is ~2% slower, while classroom is ~2% faster, other scenes could use more testing still. Reviewers: sergey, nirved Reviewed By: nirved Subscribers: Blendify, bliblubli Differential Revision: https://developer.blender.org/D2611
author: Mai Lavelle <mai.lavelle@gmail.com> 2017-03-21 05:31:54 +0300
committer: Mai Lavelle <mai.lavelle@gmail.com> 2017-05-02 21:26:46 +0300
commit: 915766f42df56a0fee47e8eb21f8b024f01afe18 (patch)
tree: 52b29b9ff2385e42221e2d39741a220354937e63 /intern/cycles
parent: 89b1805df624bf665336baaf51650928c556456b (diff)
25 files changed, 1043 insertions, 389 deletions
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 19ab9053e9a..ab1fe8924bc 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -78,7 +78,7 @@ def use_cuda(context):
 def use_branched_path(context):
     cscene = context.scene.cycles
 
-    return (cscene.progressive == 'BRANCHED_PATH' and not use_opencl(context))
+    return (cscene.progressive == 'BRANCHED_PATH')
 
 
 def use_sample_all_lights(context):
@@ -156,7 +156,6 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
 
         row = layout.row()
         sub = row.row()
-        sub.active = get_device_type(context) != 'OPENCL' or use_cpu(context)
         sub.prop(cscene, "progressive", text="")
         row.prop(cscene, "use_square_samples")
 
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 71d52bb8097..abd26f5be49 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -240,6 +240,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 				ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
 				ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 3750225571d..a92e8bc4aee 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -235,6 +235,7 @@ set(SRC_UTIL_HEADERS
 )
 
 set(SRC_SPLIT_HEADERS
+	split/kernel_branched.h
 	split/kernel_buffer_update.h
 	split/kernel_data_init.h
 	split/kernel_direct_lighting.h
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index e7957042182..58da141aed3 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -58,7 +58,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
                                         ShaderData *sd,
                                         ShaderData *emission_sd,
                                         PathRadiance *L,
-                                        PathState *state,
+                                        ccl_addr_space PathState *state,
                                         RNG *rng,
                                         float3 throughput,
                                         float3 ao_alpha)
@@ -98,6 +98,8 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
 	}
 }
 
+#ifndef __SPLIT_KERNEL__
+
 ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                      ShaderData *sd,
                                      ShaderData *emission_sd,
@@ -818,5 +820,7 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
 	path_rng_end(kg, rng_state, rng);
 }
 
+#endif  /* __SPLIT_KERNEL__ */
+
 CCL_NAMESPACE_END
 
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 085eb42325d..ddcb57161ea 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -22,7 +22,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
                                                ShaderData *sd,
                                                ShaderData *emission_sd,
                                                PathRadiance *L,
-                                               PathState *state,
+                                               ccl_addr_space PathState *state,
                                                RNG *rng,
                                                float3 throughput)
 {
@@ -65,6 +65,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
 	}
 }
 
+#ifndef __SPLIT_KERNEL__
 
 /* bounce off surface and integrate indirect light */
 ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
@@ -648,6 +649,8 @@ ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
 	path_rng_end(kg, rng_state, rng);
 }
 
+#endif  /* __SPLIT_KERNEL__ */
+
 #endif  /* __BRANCHED_PATH__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 076c82f3853..bd4ba775b4d 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -155,7 +155,7 @@ ccl_device bool kernel_branched_path_surface_bounce(
         ccl_addr_space float3 *throughput,
         ccl_addr_space PathState *state,
         PathRadiance *L,
-        Ray *ray)
+        ccl_addr_space Ray *ray)
 {
 	/* sample BSDF */
 	float bsdf_pdf;
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index baf629342b9..274713addc2 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -417,9 +417,8 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
 	subsurface_scatter_setup_diffuse_bsdf(sd, sc, weight, true, N);
 }
 
-#ifndef __SPLIT_KERNEL__
 /* subsurface scattering step, from a point on the surface to another nearby point on the same object */
-ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathState *state,
+ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_global PathState *state,
 	int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
 {
 	float3 eval = make_float3(0.0f, 0.0f, 0.0f);
@@ -507,7 +506,6 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathS
 	/* setup diffuse bsdf */
 	subsurface_scatter_setup_diffuse_bsdf(sd, sc, eval, (ss_isect.num_hits > 0), N);
 }
-#endif /* ! __SPLIT_KERNEL__ */
 
 CCL_NAMESPACE_END
 
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index b1269cdb6b4..6417f621c8f 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -71,22 +71,18 @@ CCL_NAMESPACE_BEGIN
 #  endif
 #  define __KERNEL_SHADING__
 #  define __KERNEL_ADV_SHADING__
-#  ifndef __SPLIT_KERNEL__
-#    define __BRANCHED_PATH__
-#  endif
+#  define __BRANCHED_PATH__
 #  ifdef WITH_OSL
 #    define __OSL__
 #  endif
-#  define __SUBSURFACE__
 #  define __PRINCIPLED__
+#  define __SUBSURFACE__
 #  define __CMJ__
 #  define __VOLUME__
 #  define __VOLUME_SCATTER__
 #  define __SHADOW_RECORD_ALL__
-#  ifndef __SPLIT_KERNEL__
-#    define __VOLUME_DECOUPLED__
-#    define __VOLUME_RECORD_ALL__
-#  endif
+#  define __VOLUME_DECOUPLED__
+#  define __VOLUME_RECORD_ALL__
 #endif  /* __KERNEL_CPU__ */
 
 #ifdef __KERNEL_CUDA__
@@ -138,6 +134,7 @@ CCL_NAMESPACE_BEGIN
 #    define __VOLUME_SCATTER__
 #    define __SHADOW_RECORD_ALL__
 #    define __CMJ__
+#    define __BRANCHED_PATH__
 #  endif  /* __KERNEL_OPENCL_AMD__ */
 
 #  ifdef __KERNEL_OPENCL_INTEL_CPU__
@@ -1300,7 +1297,6 @@ typedef ccl_addr_space struct DebugData {
  * Queue 3 - Shadow ray cast kernel - AO
  * Queeu 4 - Shadow ray cast kernel - direct lighting
  */
-#define NUM_QUEUES 4
 
 /* Queue names */
 enum QueueNumber {
@@ -1313,22 +1309,37 @@ enum QueueNumber {
 	 * 3. Rays to be regenerated
 	 * are enqueued here.
 	 */
-	QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS = 1,
+	QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
 
 	/* All rays for which a shadow ray should be cast to determine radiance
 	 * contribution for AO are enqueued here.
 	 */
-	QUEUE_SHADOW_RAY_CAST_AO_RAYS = 2,
+	QUEUE_SHADOW_RAY_CAST_AO_RAYS,
 
 	/* All rays for which a shadow ray should be cast to determine radiance
 	 * contributing for direct lighting are enqueued here.
 	 */
-	QUEUE_SHADOW_RAY_CAST_DL_RAYS = 3,
+	QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+
+#ifdef __BRANCHED_PATH__
+	/* All rays moving to next iteration of the indirect loop for light */
+	QUEUE_LIGHT_INDIRECT_ITER,
+#  ifdef __VOLUME__
+	/* All rays moving to next iteration of the indirect loop for volumes */
+	QUEUE_VOLUME_INDIRECT_ITER,
+#  endif
+#  ifdef __SUBSURFACE__
+	/* All rays moving to next iteration of the indirect loop for subsurface */
+	QUEUE_SUBSURFACE_INDIRECT_ITER,
+#  endif
+#endif  /* __BRANCHED_PATH__ */
+
+	NUM_QUEUES
 };
 
-/* We use RAY_STATE_MASK to get ray_state (enums 0 to 5) */
-#define RAY_STATE_MASK 0x007
-#define RAY_FLAG_MASK 0x0F8
+/* We use RAY_STATE_MASK to get ray_state */
+#define RAY_STATE_MASK 0x0F
+#define RAY_FLAG_MASK 0xF0
 enum RayState {
 	RAY_INVALID = 0,
 	/* Denotes ray is actively involved in path-iteration. */
@@ -1343,14 +1354,22 @@ enum RayState {
 	RAY_TO_REGENERATE,
 	/* Denotes ray has been regenerated */
 	RAY_REGENERATED,
-	/* Flag's ray has to execute shadow blocked function in AO part */
-	RAY_SHADOW_RAY_CAST_AO = 16,
-	/* Flag's ray has to execute shadow blocked function in direct lighting part. */
-	RAY_SHADOW_RAY_CAST_DL = 32,
+	/* Denotes ray is moving to next iteration of the branched indirect loop */
+	RAY_LIGHT_INDIRECT_NEXT_ITER,
+	RAY_VOLUME_INDIRECT_NEXT_ITER,
+	RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
+
+	/* Ray flags */
+
+	/* Flags to denote that the ray is currently evaluating the branched indirect loop */
+	RAY_BRANCHED_LIGHT_INDIRECT = (1 << 4),
+	RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
+	RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
+	RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT),
 };
 
 #define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
-#define IS_STATE(ray_state, ray_index, state) ((ray_state[ray_index] & RAY_STATE_MASK) == state)
+#define IS_STATE(ray_state, ray_index, state) ((ray_index) != QUEUE_EMPTY_SLOT && ((ray_state)[(ray_index)] & RAY_STATE_MASK) == (state))
 #define ADD_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] | flag))
 #define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
 #define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 148b2eef568..96f54bb427e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -183,7 +183,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index a679eff8409..585b91876a9 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -110,7 +110,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
index 7a1838e485f..99b74a1802b 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
@@ -22,6 +22,5 @@ __kernel void kernel_ocl_path_trace_subsurface_scatter(
         ccl_global char *kg,
         ccl_constant KernelData *data)
 {
-	ccl_local unsigned int local_queue_atomics;
-	kernel_subsurface_scatter((KernelGlobals*)kg, &local_queue_atomics);
+	kernel_subsurface_scatter((KernelGlobals*)kg);
 }
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
new file mode 100644
index 00000000000..c7bc1b4df0a
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __BRANCHED_PATH__
+
+/* sets up the various state needed to do an indirect loop */
+ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg, int ray_index)
+{
+	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+	/* save a copy of the state to restore later */
+#define BRANCHED_STORE(name) \
+		branched_state->name = kernel_split_state.name[ray_index];
+
+	BRANCHED_STORE(path_state);
+	BRANCHED_STORE(throughput);
+	BRANCHED_STORE(ray);
+	BRANCHED_STORE(sd);
+	BRANCHED_STORE(isect);
+	BRANCHED_STORE(ray_state);
+
+#undef BRANCHED_STORE
+
+	/* set loop counters to intial position */
+	branched_state->next_closure = 0;
+	branched_state->next_sample = 0;
+}
+
+/* ends an indirect loop and restores the previous state */
+ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg, int ray_index)
+{
+	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+	/* restore state */
+#define BRANCHED_RESTORE(name) \
+		kernel_split_state.name[ray_index] = branched_state->name;
+
+	BRANCHED_RESTORE(path_state);
+	BRANCHED_RESTORE(throughput);
+	BRANCHED_RESTORE(ray);
+	BRANCHED_RESTORE(sd);
+	BRANCHED_RESTORE(isect);
+	BRANCHED_RESTORE(ray_state);
+
+#undef BRANCHED_RESTORE
+
+	/* leave indirect loop */
+	REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
+}
+
+/* bounce off surface and integrate indirect light */
+ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg,
+                                                                                int ray_index,
+                                                                                float num_samples_adjust,
+                                                                                ShaderData *saved_sd,
+                                                                                bool reset_path_state)
+{
+	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+	ShaderData *sd = saved_sd;
+	RNG rng = kernel_split_state.rng[ray_index];
+	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+	float3 throughput = branched_state->throughput;
+
+	for(int i = branched_state->next_closure; i < sd->num_closure; i++) {
+		const ShaderClosure *sc = &sd->closure[i];
+
+		if(!CLOSURE_IS_BSDF(sc->type))
+			continue;
+		/* transparency is not handled here, but in outer loop */
+		if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
+			continue;
+
+		int num_samples;
+
+		if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+			num_samples = kernel_data.integrator.diffuse_samples;
+		else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
+			num_samples = 1;
+		else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
+			num_samples = kernel_data.integrator.glossy_samples;
+		else
+			num_samples = kernel_data.integrator.transmission_samples;
+
+		num_samples = ceil_to_int(num_samples_adjust*num_samples);
+
+		float num_samples_inv = num_samples_adjust/num_samples;
+		RNG bsdf_rng = cmj_hash(rng, i);
+
+		for(int j = branched_state->next_sample; j < num_samples; j++) {
+			ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+			if(reset_path_state) {
+				*ps = branched_state->path_state;
+			}
+
+			ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+			*tp = throughput;
+
+			ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
+
+			if(!kernel_branched_path_surface_bounce(kg,
+			                                        &bsdf_rng,
+			                                        sd,
+			                                        sc,
+			                                        j,
+			                                        num_samples,
+			                                        tp,
+			                                        ps,
+			                                        L,
+			                                        bsdf_ray))
+			{
+				continue;
+			}
+
+			/* update state for next iteration */
+			branched_state->next_closure = i;
+			branched_state->next_sample = j+1;
+			branched_state->num_samples = num_samples;
+
+			/* start the indirect path */
+			*tp *= num_samples_inv;
+
+			return true;
+		}
+
+		branched_state->next_sample = 0;
+	}
+
+	return false;
+}
+
+#endif  /* __BRANCHED_PATH__ */
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 9d3d01fff75..642ccac8239 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -105,21 +105,16 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
 
 	/* Initialize queue data and queue index. */
 	if(thread_index < queuesize) {
-		/* Initialize active ray queue. */
-		kernel_split_state.queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
-		/* Initialize background and buffer update queue. */
-		kernel_split_state.queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
-		/* Initialize shadow ray cast of AO queue. */
-		kernel_split_state.queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
-		/* Initialize shadow ray cast of direct lighting queue. */
-		kernel_split_state.queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+		for(int i = 0; i < NUM_QUEUES; i++) {
+			kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+		}
 	}
 
 	if(thread_index == 0) {
-		Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
-		Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
-		Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
-		Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
+		for(int i = 0; i < NUM_QUEUES; i++) {
+			Queue_index[i] = 0;
+		}
+
 		/* The scene-intersect kernel should not use the queues very first time.
 		 * since the queue would be empty.
 		 */
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index bdbf7387b95..3336c968a44 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -56,23 +56,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
 	                          kernel_split_params.queue_size,
 	                          0);
 
-#ifdef __COMPUTE_DEVICE_GPU__
-	/* If we are executing on a GPU device, we exit all threads that are not
-	 * required.
-	 *
-	 * If we are executing on a CPU device, then we need to keep all threads
-	 * active since we have barrier() calls later in the kernel. CPU devices,
-	 * expect all threads to execute barrier statement.
-	 */
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
-#endif
-
-#ifndef __COMPUTE_DEVICE_GPU__
-	if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
-
 	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
 		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
 		ShaderData *sd = &kernel_split_state.sd[ray_index];
@@ -80,25 +63,24 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
 		/* direct lighting */
 #ifdef __EMISSION__
 		RNG rng = kernel_split_state.rng[ray_index];
+
 		bool flag = (kernel_data.integrator.use_direct_light &&
 		             (sd->flag & SD_BSDF_HAS_EVAL));
+
+#  ifdef __BRANCHED_PATH__
+		if(flag && kernel_data.integrator.branched) {
+			flag = false;
+			enqueue_flag = 1;
+		}
+#  endif  /* __BRANCHED_PATH__ */
+
 #  ifdef __SHADOW_TRICKS__
 		if(flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
 			flag = false;
-			ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
-			float3 throughput = kernel_split_state.throughput[ray_index];
-			PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-			kernel_branched_path_surface_connect_light(kg,
-			                                           &rng,
-			                                           sd,
-			                                           emission_sd,
-			                                           state,
-			                                           throughput,
-			                                           1.0f,
-			                                           L,
-			                                           1);
+			enqueue_flag = 1;
 		}
 #  endif  /* __SHADOW_TRICKS__ */
+
 		if(flag) {
 			/* Sample illumination from lights to find path contribution. */
 			float light_t = path_state_rng_1D(kg, &rng, state, PRNG_LIGHT);
@@ -129,7 +111,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
 					kernel_split_state.bsdf_eval[ray_index] = L_light;
 					kernel_split_state.is_lamp[ray_index] = is_lamp;
 					/* Mark ray state for next shadow kernel. */
-					ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
 					enqueue_flag = 1;
 				}
 			}
@@ -138,10 +119,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
 #endif  /* __EMISSION__ */
 	}
 
-#ifndef __COMPUTE_DEVICE_GPU__
-	}
-#endif
-
 #ifdef __EMISSION__
 	/* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
 	enqueue_ray_index_local(ray_index,
@@ -152,6 +129,27 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
 	                        kernel_split_state.queue_data,
 	                        kernel_split_params.queue_index);
 #endif
+
+#ifdef __BRANCHED_PATH__
+	/* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays
+	 * this is the last kernel before next_iteration_setup that uses local atomics so we do this here
+	 */
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+		*local_queue_atomics = 0;
+	}
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+	ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+	enqueue_ray_index_local(ray_index,
+	                        QUEUE_LIGHT_INDIRECT_ITER,
+	                        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER),
+	                        kernel_split_params.queue_size,
+	                        local_queue_atomics,
+	                        kernel_split_state.queue_data,
+	                        kernel_split_params.queue_index);
+
+#endif  /* __BRANCHED_PATH__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 47d3c280831..182e6c6e4fa 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -16,6 +16,81 @@
 
 CCL_NAMESPACE_BEGIN
 
+#if defined(__BRANCHED_PATH__) && defined(__VOLUME__)
+
+ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index)
+{
+	kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+	ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
+}
+
+ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index)
+{
+	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+	ShaderData *sd = &kernel_split_state.sd[ray_index];
+	RNG rng = kernel_split_state.rng[ray_index];
+	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+	ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
+	/* GPU: no decoupled ray marching, scatter probalistically */
+	int num_samples = kernel_data.integrator.volume_samples;
+	float num_samples_inv = 1.0f/num_samples;
+
+	Ray volume_ray = branched_state->ray;
+	volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX;
+
+	bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
+
+	for(int j = branched_state->next_sample; j < num_samples; j++) {
+		ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+		*ps = branched_state->path_state;
+
+		ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
+		*pray = branched_state->ray;
+
+		ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+		*tp = branched_state->throughput * num_samples_inv;
+
+		/* branch RNG state */
+		path_state_branch(ps, j, num_samples);
+
+		/* integrate along volume segment with distance sampling */
+		VolumeIntegrateResult result = kernel_volume_integrate(
+			kg, ps, sd, &volume_ray, L, tp, &rng, heterogeneous);
+
+#  ifdef __VOLUME_SCATTER__
+		if(result == VOLUME_PATH_SCATTERED) {
+			/* direct lighting */
+			kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *tp, &branched_state->path_state, L);
+
+			/* indirect light bounce */
+			if(!kernel_path_volume_bounce(kg, &rng, sd, tp, ps, L, pray)) {
+				continue;
+			}
+
+			/* start the indirect path */
+			branched_state->next_closure = 0;
+			branched_state->next_sample = j+1;
+			branched_state->num_samples = num_samples;
+
+			return true;
+		}
+#  endif
+	}
+
+	kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+	/* todo: avoid this calculation using decoupled ray marching */
+	float3 throughput = kernel_split_state.throughput[ray_index];
+	kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
+	kernel_split_state.throughput[ray_index] = throughput;
+
+	return false;
+}
+
+#endif  /* __BRANCHED_PATH__ && __VOLUME__ */
 
 ccl_device void kernel_do_volume(KernelGlobals *kg)
 {
@@ -23,37 +98,37 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
 	/* We will empty this queue in this kernel. */
 	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
 		kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+#  ifdef __BRANCHED_PATH__
+		kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
+#  endif  /* __BRANCHED_PATH__ */
 	}
-	/* Fetch use_queues_flag. */
-	char local_use_queues_flag = *kernel_split_params.use_queues_flag;
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(local_use_queues_flag) {
+
+	if(*kernel_split_params.use_queues_flag) {
 		ray_index = get_ray_index(kg, ray_index,
 		                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
 		                          kernel_split_state.queue_data,
 		                          kernel_split_params.queue_size,
 		                          1);
-		if(ray_index == QUEUE_EMPTY_SLOT) {
-			return;
-		}
 	}
 
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
-	   IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+	ccl_global char *ray_state = kernel_split_state.ray_state;
 
-		bool hit = ! IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
+	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
 
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+	ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+	ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+	RNG rng = kernel_split_state.rng[ray_index];
+	ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+	ShaderData *sd = &kernel_split_state.sd[ray_index];
+	ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
 
-		ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		RNG rng = kernel_split_state.rng[ray_index];
-		ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
-		ShaderData *sd = &kernel_split_state.sd[ray_index];
-		ShaderData *sd_input = &kernel_split_state.sd_DL_shadow[ray_index];
+	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
+	   IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+
+		bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
 
 		/* Sanitize volume stack. */
 		if(!hit) {
@@ -64,31 +139,68 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
 			Ray volume_ray = *ray;
 			volume_ray.t = (hit)? isect->t: FLT_MAX;
 
-			bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+#  ifdef __BRANCHED_PATH__
+			if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#  endif  /* __BRANCHED_PATH__ */
+				bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
 
-			{
-				/* integrate along volume segment with distance sampling */
-				VolumeIntegrateResult result = kernel_volume_integrate(
-					kg, state, sd, &volume_ray, L, throughput, &rng, heterogeneous);
+				{
+					/* integrate along volume segment with distance sampling */
+					VolumeIntegrateResult result = kernel_volume_integrate(
+						kg, state, sd, &volume_ray, L, throughput, &rng, heterogeneous);
 
 #  ifdef __VOLUME_SCATTER__
-				if(result == VOLUME_PATH_SCATTERED) {
-					/* direct lighting */
-					kernel_path_volume_connect_light(kg, &rng, sd, sd_input, *throughput, state, L);
-
-					/* indirect light bounce */
-					if(kernel_path_volume_bounce(kg, &rng, sd, throughput, state, L, ray))
-						ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED);
-					else
-						ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER);
+					if(result == VOLUME_PATH_SCATTERED) {
+						/* direct lighting */
+						kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *throughput, state, L);
+
+						/* indirect light bounce */
+						if(kernel_path_volume_bounce(kg, &rng, sd, throughput, state, L, ray)) {
+							ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+						}
+						else {
+							kernel_split_path_end(kg, ray_index);
+						}
+					}
+#  endif  /* __VOLUME_SCATTER__ */
+				}
+
+#  ifdef __BRANCHED_PATH__
+			}
+			else {
+				kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
+
+				if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+					ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
 				}
-#  endif
 			}
+#  endif  /* __BRANCHED_PATH__ */
 		}
+
 		kernel_split_state.rng[ray_index] = rng;
 	}
 
-#endif
+#  ifdef __BRANCHED_PATH__
+	/* iter loop */
+	ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+	                          QUEUE_VOLUME_INDIRECT_ITER,
+	                          kernel_split_state.queue_data,
+	                          kernel_split_params.queue_size,
+	                          1);
+
+	if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
+		/* for render passes, sum and reset indirect light pass variables
+		 * for the next samples */
+		path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+		path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+		if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+		}
+	}
+#  endif  /* __BRANCHED_PATH__ */
+
+#endif  /* __VOLUME__ */
 }
 
 
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index 89adeb64c8a..87498910d38 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -52,6 +52,7 @@ CCL_NAMESPACE_BEGIN
  *   - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with
  *     flag RAY_SHADOW_RAY_CAST_AO
  */
+
 ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
         KernelGlobals *kg,
         ccl_local_param BackgroundAOLocals *locals)
@@ -62,8 +63,9 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 	}
 	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
+#ifdef __AO__
 	char enqueue_flag = 0;
-	char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
+#endif
 	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 	ray_index = get_ray_index(kg, ray_index,
 	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
@@ -155,8 +157,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 				kernel_split_state.L_transparent[ray_index] += average(holdout_weight*throughput);
 			}
 			if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
-				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-				enqueue_flag = 1;
+				kernel_split_path_end(kg, ray_index);
 			}
 		}
 #endif  /* __HOLDOUT__ */
@@ -164,18 +165,31 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 
 	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
 		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-		/* Holdout mask objects do not write data passes. */
-		kernel_write_data_passes(kg,
-		                         buffer,
-		                         L,
-		                         sd,
-		                         sample,
-		                         state,
-		                         throughput);
+
+#ifdef __BRANCHED_PATH__
+		if(!IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))
+#endif  /* __BRANCHED_PATH__ */
+		{
+			/* Holdout mask objects do not write data passes. */
+			kernel_write_data_passes(kg,
+				                     buffer,
+				                     L,
+				                     sd,
+				                     sample,
+				                     state,
+				                     throughput);
+		}
+
 		/* Blurring of bsdf after bounces, for rays that have a small likelihood
 		 * of following this particular path (diffuse, rough glossy.
 		 */
-		if(kernel_data.integrator.filter_glossy != FLT_MAX) {
+#ifndef __BRANCHED_PATH__
+		if(kernel_data.integrator.filter_glossy != FLT_MAX)
+#else
+		if(kernel_data.integrator.filter_glossy != FLT_MAX &&
+		   (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)))
+#endif  /* __BRANCHED_PATH__ */
+		{
 			float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
 			if(blur_pdf < 1.0f) {
 				float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
@@ -201,19 +215,32 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 		 * mainly due to the mixed in MIS that we use. gives too many unneeded
 		 * shader evaluations, only need emission if we are going to terminate.
 		 */
+#ifndef __BRANCHED_PATH__
 		float probability = path_state_terminate_probability(kg, state, throughput);
+#else
+		float probability = 1.0f;
+
+		if(!kernel_data.integrator.branched) {
+			probability = path_state_terminate_probability(kg, state, throughput);
+		}
+		else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+			int num_samples = kernel_split_state.branched_state[ray_index].num_samples;
+			probability = path_state_terminate_probability(kg, state, throughput*num_samples);
+		}
+		else if(state->flag & PATH_RAY_TRANSPARENT) {
+			probability = path_state_terminate_probability(kg, state, throughput);
+		}
+#endif
 
 		if(probability == 0.0f) {
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-			enqueue_flag = 1;
+			kernel_split_path_end(kg, ray_index);
 		}
 
 		if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
 			if(probability != 1.0f) {
 				float terminate = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_TERMINATE);
 				if(terminate >= probability) {
-					ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-					enqueue_flag = 1;
+					kernel_split_path_end(kg, ray_index);
 				}
 				else {
 					kernel_split_state.throughput[ray_index] = throughput/probability;
@@ -225,61 +252,23 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 #ifdef __AO__
 	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
 		/* ambient occlusion */
-		if(kernel_data.integrator.use_ambient_occlusion ||
-		   (sd->flag & SD_AO))
-		{
-			/* todo: solve correlation */
-			float bsdf_u, bsdf_v;
-			path_state_rng_2D(kg, &rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
-			float ao_factor = kernel_data.background.ao_factor;
-			float3 ao_N;
-			kernel_split_state.ao_bsdf[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
-			kernel_split_state.ao_alpha[ray_index] = shader_bsdf_alpha(kg, sd);
-
-			float3 ao_D;
-			float ao_pdf;
-			sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
-			if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
-				Ray _ray;
-				_ray.P = ray_offset(sd->P, sd->Ng);
-				_ray.D = ao_D;
-				_ray.t = kernel_data.background.ao_distance;
-#ifdef __OBJECT_MOTION__
-				_ray.time = sd->time;
-#endif
-				_ray.dP = sd->dP;
-				_ray.dD = differential3_zero();
-				kernel_split_state.ao_light_ray[ray_index] = _ray;
-
-				ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
-				enqueue_flag_AO_SHADOW_RAY_CAST = 1;
-			}
+		if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
+			enqueue_flag = 1;
 		}
 	}
 #endif  /* __AO__ */
-	kernel_split_state.rng[ray_index] = rng;
 
+	kernel_split_state.rng[ray_index] = rng;
 
 #ifndef __COMPUTE_DEVICE_GPU__
 	}
 #endif
 
-	/* Enqueue RAY_UPDATE_BUFFER rays. */
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        &locals->queue_atomics_bg,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
-
 #ifdef __AO__
 	/* Enqueue to-shadow-ray-cast rays. */
 	enqueue_ray_index_local(ray_index,
 	                        QUEUE_SHADOW_RAY_CAST_AO_RAYS,
-	                        enqueue_flag_AO_SHADOW_RAY_CAST,
+	                        enqueue_flag,
 	                        kernel_split_params.queue_size,
 	                        &locals->queue_atomics_ao,
 	                        kernel_split_state.queue_data,
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index 8192528622e..6fbc888e358 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -34,7 +34,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
 			if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
 				ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
 				if(state->bounce > kernel_data.integrator.ao_bounces) {
-					ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+					kernel_split_path_end(kg, ray_index);
 				}
 			}
 		}
@@ -63,7 +63,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
 #ifdef __PASSES__
 			if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
 #endif
-				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+				kernel_split_path_end(kg, ray_index);
 		}
 
 		if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
@@ -72,7 +72,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
 			float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
 			path_radiance_accum_background(L, state, (*throughput), L_background);
 #endif
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+			kernel_split_path_end(kg, ray_index);
 		}
 	}
 
diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
index a56e85abeb9..82bc2f01fd7 100644
--- a/intern/cycles/kernel/split/kernel_indirect_subsurface.h
+++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
@@ -49,26 +49,29 @@ ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
 	ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
 	ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
 
-	if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
-		ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
-		kernel_path_subsurface_accum_indirect(ss_indirect, L);
+#ifdef __BRANCHED_PATH__
+	if(!kernel_data.integrator.branched) {
+#endif
+		if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+			ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+			kernel_path_subsurface_accum_indirect(ss_indirect, L);
 
-		/* Trace indirect subsurface rays by restarting the loop. this uses less
-		 * stack memory than invoking kernel_path_indirect.
-		 */
-		if(ss_indirect->num_rays) {
-			kernel_path_subsurface_setup_indirect(kg,
-			                                      ss_indirect,
-			                                      state,
-			                                      ray,
-			                                      L,
-			                                      throughput);
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-		}
-		else {
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+			/* Trace indirect subsurface rays by restarting the loop. this uses less
+			 * stack memory than invoking kernel_path_indirect.
+			 */
+			if(ss_indirect->num_rays) {
+				kernel_path_subsurface_setup_indirect(kg,
+					                                  ss_indirect,
+					                                  state,
+					                                  ray,
+					                                  L,
+					                                  throughput);
+				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+			}
 		}
+#ifdef __BRANCHED_PATH__
 	}
+#endif
 
 #endif  /* __SUBSURFACE__ */
 
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index 1bebc16e25b..71017fed19e 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -44,6 +44,52 @@ CCL_NAMESPACE_BEGIN
  *   - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
  *     RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays.
  */
+
+#ifdef __BRANCHED_PATH__
+ccl_device_inline void kernel_split_branched_indirect_light_init(KernelGlobals *kg, int ray_index)
+{
+	kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+	ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT);
+}
+
+ccl_device void kernel_split_branched_indirect_light_end(KernelGlobals *kg, int ray_index)
+{
+	kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+	ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+	ShaderData *sd = &kernel_split_state.sd[ray_index];
+	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+	ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+
+	/* continue in case of transparency */
+	*throughput *= shader_bsdf_transparency(kg, sd);
+
+	if(is_zero(*throughput)) {
+		kernel_split_path_end(kg, ray_index);
+	}
+	else {
+		/* Update Path State */
+		state->flag |= PATH_RAY_TRANSPARENT;
+		state->transparent_bounce++;
+
+		ray->P = ray_offset(sd->P, -sd->Ng);
+		ray->t -= sd->ray_length; /* clipping works through transparent */
+
+#  ifdef __RAY_DIFFERENTIALS__
+		ray->dP = sd->dP;
+		ray->dD.dx = -sd->dI.dx;
+		ray->dD.dy = -sd->dI.dy;
+#  endif  /* __RAY_DIFFERENTIALS__ */
+
+#  ifdef __VOLUME__
+		/* enter/exit volume */
+		kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+#  endif  /* __VOLUME__ */
+	}
+}
+#endif  /* __BRANCHED_PATH__ */
+
 ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
                                             ccl_local_param unsigned int *local_queue_atomics)
 {
@@ -67,7 +113,6 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
 		kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
 	}
 
-	char enqueue_flag = 0;
 	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 	ray_index = get_ray_index(kg, ray_index,
 	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
@@ -75,102 +120,125 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
 	                          kernel_split_params.queue_size,
 	                          0);
 
-#ifdef __COMPUTE_DEVICE_GPU__
-	/* If we are executing on a GPU device, we exit all threads that are not
-	 * required.
-	 *
-	 * If we are executing on a CPU device, then we need to keep all threads
-	 * active since we have barrier() calls later in the kernel. CPU devices,
-	 * expect all threads to execute barrier statement.
-	 */
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
-#endif
-
-#ifndef __COMPUTE_DEVICE_GPU__
-	if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
-
-	/* Load ShaderData structure. */
-	PathRadiance *L = NULL;
-	ccl_global PathState *state = NULL;
 	ccl_global char *ray_state = kernel_split_state.ray_state;
 
-	/* Path radiance update for AO/Direct_lighting's shadow blocked. */
-	if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
-	   IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
-	{
-		state = &kernel_split_state.path_state[ray_index];
-		L = &kernel_split_state.path_radiance[ray_index];
-		float3 _throughput = kernel_split_state.throughput[ray_index];
-
-		if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) {
-			float3 shadow = kernel_split_state.ao_light_ray[ray_index].P;
-			// TODO(mai): investigate correctness here
-			char update_path_radiance = (char)kernel_split_state.ao_light_ray[ray_index].t;
-			if(update_path_radiance) {
-				path_radiance_accum_ao(L,
-				                       _throughput,
-				                       kernel_split_state.ao_alpha[ray_index],
-				                       kernel_split_state.ao_bsdf[ray_index],
-				                       shadow,
-				                       state->bounce);
-			}
-			else {
-				path_radiance_accum_total_ao(L, _throughput, kernel_split_state.ao_bsdf[ray_index]);
+	bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE);
+	if(active) {
+		ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+		RNG rng = kernel_split_state.rng[ray_index];
+		ShaderData *sd = &kernel_split_state.sd[ray_index];
+		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+#ifdef __BRANCHED_PATH__
+		if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#endif
+			/* Compute direct lighting and next bounce. */
+			if(!kernel_path_surface_bounce(kg, &rng, sd, throughput, state, L, ray)) {
+				kernel_split_path_end(kg, ray_index);
 			}
-			REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
+#ifdef __BRANCHED_PATH__
 		}
-
-		if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL)) {
-			float3 shadow = kernel_split_state.light_ray[ray_index].P;
-			// TODO(mai): investigate correctness here
-			char update_path_radiance = (char)kernel_split_state.light_ray[ray_index].t;
-			BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
-			if(update_path_radiance) {
-				path_radiance_accum_light(L,
-				                          _throughput,
-				                          &L_light,
-				                          shadow,
-				                          1.0f,
-				                          state->bounce,
-				                          kernel_split_state.is_lamp[ray_index]);
+		else {
+			kernel_split_branched_indirect_light_init(kg, ray_index);
+
+			if(kernel_split_branched_path_surface_indirect_light_iter(kg,
+			                                                          ray_index,
+			                                                          1.0f,
+			                                                          &kernel_split_state.branched_state[ray_index].sd,
+			                                                          true))
+			{
+				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
 			}
 			else {
-				path_radiance_accum_total_light(L, _throughput, &L_light);
+				kernel_split_branched_indirect_light_end(kg, ray_index);
 			}
-			REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
 		}
+#endif  /* __BRANCHED_PATH__ */
+
+		kernel_split_state.rng[ray_index] = rng;
 	}
 
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		RNG rng = kernel_split_state.rng[ray_index];
-		state = &kernel_split_state.path_state[ray_index];
-		L = &kernel_split_state.path_radiance[ray_index];
+	/* Enqueue RAY_UPDATE_BUFFER rays. */
+	enqueue_ray_index_local(ray_index,
+	                        QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+	                        IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active,
+	                        kernel_split_params.queue_size,
+	                        local_queue_atomics,
+	                        kernel_split_state.queue_data,
+	                        kernel_split_params.queue_index);
+
+#ifdef __BRANCHED_PATH__
+	/* iter loop */
+	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+		kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0;
+	}
 
-		/* Compute direct lighting and next bounce. */
-		if(!kernel_path_surface_bounce(kg, &rng, &kernel_split_state.sd[ray_index], throughput, state, L, ray)) {
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-			enqueue_flag = 1;
+	ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+	                          QUEUE_LIGHT_INDIRECT_ITER,
+	                          kernel_split_state.queue_data,
+	                          kernel_split_params.queue_size,
+	                          1);
+
+	if(IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) {
+		/* for render passes, sum and reset indirect light pass variables
+		 * for the next samples */
+		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+		path_radiance_sum_indirect(L);
+		path_radiance_reset_indirect(L);
+
+		if(kernel_split_branched_path_surface_indirect_light_iter(kg,
+		                                                          ray_index,
+		                                                          1.0f,
+		                                                          &kernel_split_state.branched_state[ray_index].sd,
+		                                                          true))
+		{
+			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+		}
+		else {
+			kernel_split_branched_indirect_light_end(kg, ray_index);
 		}
-		kernel_split_state.rng[ray_index] = rng;
 	}
 
-#ifndef __COMPUTE_DEVICE_GPU__
+#  ifdef __VOLUME__
+	/* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+		*local_queue_atomics = 0;
 	}
-#endif
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	/* Enqueue RAY_UPDATE_BUFFER rays. */
+	ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 	enqueue_ray_index_local(ray_index,
-	                        QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                        enqueue_flag,
+	                        QUEUE_VOLUME_INDIRECT_ITER,
+	                        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER),
+	                        kernel_split_params.queue_size,
+	                        local_queue_atomics,
+	                        kernel_split_state.queue_data,
+	                        kernel_split_params.queue_index);
+
+#  endif  /* __VOLUME__ */
+
+#  ifdef __SUBSURFACE__
+	/* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+		*local_queue_atomics = 0;
+	}
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+	ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+	enqueue_ray_index_local(ray_index,
+	                        QUEUE_SUBSURFACE_INDIRECT_ITER,
+	                        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER),
 	                        kernel_split_params.queue_size,
 	                        local_queue_atomics,
 	                        kernel_split_state.queue_data,
 	                        kernel_split_params.queue_index);
+#  endif  /* __SUBSURFACE__ */
+#endif  /* __BRANCHED_PATH__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 0f1696e34a0..957d70958d9 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -38,8 +38,10 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg,
 	                          kernel_split_params.queue_size,
 	                          0);
 
+	ccl_global char *ray_state = kernel_split_state.ray_state;
+
 	char enqueue_flag = 0;
-	if((ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
+	if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
 		enqueue_flag = 1;
 	}
 
@@ -52,7 +54,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg,
 	                        kernel_split_params.queue_index);
 
 	/* Continue on with shader evaluation. */
-	if((ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
 		Intersection isect = kernel_split_state.isect[ray_index];
 		RNG rng = kernel_split_state.rng[ray_index];
 		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
@@ -62,8 +64,27 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg,
 		                      &kernel_split_state.sd[ray_index],
 		                      &isect,
 		                      &ray);
+
+#ifndef __BRANCHED_PATH__
 		float rbsdf = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_BSDF);
 		shader_eval_surface(kg, &kernel_split_state.sd[ray_index], &rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
+#else
+		ShaderContext ctx = SHADER_CONTEXT_MAIN;
+		float rbsdf = 0.0f;
+
+		if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+			rbsdf = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_BSDF);
+
+		}
+
+		if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+			ctx = SHADER_CONTEXT_INDIRECT;
+		}
+
+		shader_eval_surface(kg, &kernel_split_state.sd[ray_index], &rng, state, rbsdf, state->flag, ctx);
+		shader_merge_closures(&kernel_split_state.sd[ray_index]);
+#endif  /* __BRANCHED_PATH__ */
+
 		kernel_split_state.rng[ray_index] = rng;
 	}
 }
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index 4243e18de72..474286285a9 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -29,31 +29,29 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
 		                          kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
 	}
 
-	if(ray_index == QUEUE_EMPTY_SLOT)
+	if(ray_index == QUEUE_EMPTY_SLOT) {
 		return;
+	}
 
-	/* Flag determining if we need to update L. */
-	char update_path_radiance = 0;
-
-	if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) {
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-		ccl_global Ray *light_ray_global = &kernel_split_state.ao_light_ray[ray_index];
-
-		float3 shadow;
-		Ray ray = *light_ray_global;
-		update_path_radiance = !(shadow_blocked(kg,
-		                                        &kernel_split_state.sd_DL_shadow[ray_index],
-		                                        state,
-		                                        &ray,
-		                                        &shadow));
-
-		*light_ray_global = ray;
-		/* We use light_ray_global's P and t to store shadow and
-		 * update_path_radiance.
-		 */
-		light_ray_global->P = shadow;
-		light_ray_global->t = update_path_radiance;
+	ShaderData *sd = &kernel_split_state.sd[ray_index];
+	ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+	RNG rng = kernel_split_state.rng[ray_index];
+	float3 throughput = kernel_split_state.throughput[ray_index];
+
+#ifdef __BRANCHED_PATH__
+	if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#endif
+		kernel_path_ao(kg, sd, emission_sd, L, state, &rng, throughput, shader_bsdf_alpha(kg, sd));
+#ifdef __BRANCHED_PATH__
+	}
+	else {
+		kernel_branched_path_ao(kg, sd, emission_sd, L, state, &rng, throughput);
 	}
+#endif
+
+	kernel_split_state.rng[ray_index] = rng;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index bb8f0157965..452b6e45a36 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -32,28 +32,71 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
 	if(ray_index == QUEUE_EMPTY_SLOT)
 		return;
 
-	/* Flag determining if we need to update L. */
-	char update_path_radiance = 0;
+	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+	Ray ray = kernel_split_state.light_ray[ray_index];
+	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+	ShaderData *sd = &kernel_split_state.sd[ray_index];
+	float3 throughput = kernel_split_state.throughput[ray_index];
+	RNG rng = kernel_split_state.rng[ray_index];
 
-	if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL)) {
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-		ccl_global Ray *light_ray_global = &kernel_split_state.light_ray[ray_index];
+	BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
+	ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+	bool is_lamp = kernel_split_state.is_lamp[ray_index];
 
+#  if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
+	bool use_branched = false;
+	int all = 0;
+
+	if(state->flag & PATH_RAY_SHADOW_CATCHER) {
+		use_branched = true;
+		all = 1;
+	}
+#    if defined(__BRANCHED_PATH__)
+	else if(kernel_data.integrator.branched) {
+		use_branched = true;
+
+		if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+			all = (kernel_data.integrator.sample_all_lights_indirect);
+		}
+		else
+		{
+			all = (kernel_data.integrator.sample_all_lights_direct);
+		}
+	}
+#    endif  /* __BRANCHED_PATH__ */
+
+	if(use_branched) {
+		kernel_branched_path_surface_connect_light(kg,
+		                                           &rng,
+		                                           sd,
+		                                           emission_sd,
+		                                           state,
+		                                           throughput,
+		                                           1.0f,
+		                                           L,
+		                                           all);
+	}
+	else
+#  endif  /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/
+	{
+		/* trace shadow ray */
 		float3 shadow;
-		Ray ray = *light_ray_global;
-		update_path_radiance = !(shadow_blocked(kg,
-		                                        &kernel_split_state.sd_DL_shadow[ray_index],
-		                                        state,
-		                                        &ray,
-		                                        &shadow));
-
-		*light_ray_global = ray;
-		/* We use light_ray_global's P and t to store shadow and
-		 * update_path_radiance.
-		 */
-		light_ray_global->P = shadow;
-		light_ray_global->t = update_path_radiance;
+
+		if(!shadow_blocked(kg,
+			               emission_sd,
+			               state,
+			               &ray,
+			               &shadow))
+		{
+			/* accumulate */
+			path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+		}
+		else {
+			path_radiance_accum_total_light(L, throughput, &L_light);
+		}
 	}
+
+	kernel_split_state.rng[ray_index] = rng;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 4303ba0a905..57f070d51e0 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -37,41 +37,42 @@
 
 #include "util/util_atomic.h"
 
-#include "kernel/kernel_random.h"
-#include "kernel/kernel_projection.h"
-#include "kernel/kernel_montecarlo.h"
-#include "kernel/kernel_differential.h"
-#include "kernel/kernel_camera.h"
-
-#include "kernel/geom/geom.h"
-#include "kernel/bvh/bvh.h"
-
-#include "kernel/kernel_accumulate.h"
-#include "kernel/kernel_shader.h"
-#include "kernel/kernel_light.h"
-#include "kernel/kernel_passes.h"
-
-#ifdef __SUBSURFACE__
-#  include "kernel/kernel_subsurface.h"
+#include "kernel/kernel_path.h"
+#ifdef __BRANCHED_PATH__
+#  include "kernel/kernel_path_branched.h"
 #endif
 
-#ifdef __VOLUME__
-#  include "kernel/kernel_volume.h"
-#endif
+#include "kernel/kernel_queues.h"
+#include "kernel/kernel_work_stealing.h"
 
-#include "kernel/kernel_path_state.h"
-#include "kernel/kernel_shadow.h"
-#include "kernel/kernel_emission.h"
-#include "kernel/kernel_path_common.h"
-#include "kernel/kernel_path_surface.h"
-#include "kernel/kernel_path_volume.h"
-#include "kernel/kernel_path_subsurface.h"
+#ifdef __BRANCHED_PATH__
+#  include "kernel/split/kernel_branched.h"
+#endif
 
-#ifdef __KERNEL_DEBUG__
-#  include "kernel/kernel_debug.h"
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
+{
+	ccl_global char *ray_state = kernel_split_state.ray_state;
+
+#ifdef __BRANCHED_PATH__
+	if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
+		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
+	}
+	else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
+		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER);
+	}
+	else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) {
+		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER);
+	}
+	else {
+		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+	}
+#else
+	ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
 #endif
+}
 
-#include "kernel/kernel_queues.h"
-#include "kernel/kernel_work_stealing.h"
+CCL_NAMESPACE_END
 
 #endif  /* __KERNEL_SPLIT_H__ */
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 0af8bfc89d5..a2cb4f6ae98 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -62,7 +62,46 @@ typedef struct SplitParams {
 	SPLIT_DATA_ENTRY(DebugData, debug_data, 1)
 #else
 #  define SPLIT_DATA_DEBUG_ENTRIES
-#endif
+#endif  /* DEBUG */
+
+#ifdef __BRANCHED_PATH__
+
+typedef ccl_global struct SplitBranchedState {
+	/* various state that must be kept and restored after an indirect loop */
+	PathState path_state;
+	float3 throughput;
+	Ray ray;
+
+	struct ShaderData sd;
+	Intersection isect;
+
+	char ray_state;
+
+	/* indirect loop state */
+	int next_closure;
+	int next_sample;
+	int num_samples;
+
+#ifdef __SUBSURFACE__
+	int ss_next_closure;
+	int ss_next_sample;
+	int next_hit;
+	int num_hits;
+
+	uint lcg_state;
+	SubsurfaceIntersection ss_isect;
+
+#  ifdef __VOLUME__
+	VolumeStack volume_stack[VOLUME_STACK_SIZE];
+#  endif  /* __VOLUME__ */
+#endif  /*__SUBSURFACE__ */
+} SplitBranchedState;
+
+#define SPLIT_DATA_BRANCHED_ENTRIES \
+	SPLIT_DATA_ENTRY( SplitBranchedState, branched_state, 1)
+#else
+#define SPLIT_DATA_BRANCHED_ENTRIES
+#endif  /* __BRANCHED_PATH__ */
 
 #define SPLIT_DATA_ENTRIES \
 	SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \
@@ -72,9 +111,6 @@ typedef struct SplitParams {
 	SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
 	SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
 	SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
-	SPLIT_DATA_ENTRY(ccl_global float3, ao_alpha, 1) \
-	SPLIT_DATA_ENTRY(ccl_global float3, ao_bsdf, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Ray, ao_light_ray, 1) \
 	SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
 	SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
 	SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
@@ -82,6 +118,7 @@ typedef struct SplitParams {
 	SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \
 	SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
 	SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
+	SPLIT_DATA_BRANCHED_ENTRIES \
 	SPLIT_DATA_DEBUG_ENTRIES \
 
 /* struct that holds pointers to data in the shared state buffer */
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 0b4d50c70ee..8364f185d75 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -16,42 +16,206 @@
 
 CCL_NAMESPACE_BEGIN
 
+#if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__)
 
-ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
-                                          ccl_local_param unsigned int* local_queue_atomics)
+ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, int ray_index)
 {
-#ifdef __SUBSURFACE__
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
+	kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+	branched_state->ss_next_closure = 0;
+	branched_state->ss_next_sample = 0;
+
+	branched_state->num_hits = 0;
+	branched_state->next_hit = 0;
+
+	ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
+}
+
+ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(KernelGlobals *kg, int ray_index)
+{
+	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+	ShaderData *sd = &branched_state->sd;
+	RNG rng = kernel_split_state.rng[ray_index];
+	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+	ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
+	for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
+		ShaderClosure *sc = &sd->closure[i];
+
+		if(!CLOSURE_IS_BSSRDF(sc->type))
+			continue;
+
+		/* set up random number generator */
+		if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
+		   branched_state->next_closure == 0 && branched_state->next_sample == 0)
+		{
+			branched_state->lcg_state = lcg_state_init(&rng,
+			                                           branched_state->path_state.rng_offset,
+			                                           branched_state->path_state.sample,
+			                                           0x68bc21eb);
+		}
+		int num_samples = kernel_data.integrator.subsurface_samples;
+		float num_samples_inv = 1.0f/num_samples;
+		RNG bssrdf_rng = cmj_hash(rng, i);
+
+		/* do subsurface scatter step with copy of shader data, this will
+		 * replace the BSSRDF with a diffuse BSDF closure */
+		for(int j = branched_state->ss_next_sample; j < num_samples; j++) {
+			ccl_global SubsurfaceIntersection *ss_isect = &branched_state->ss_isect;
+			float bssrdf_u, bssrdf_v;
+			path_branched_rng_2D(kg,
+			                     &bssrdf_rng,
+			                     &branched_state->path_state,
+			                     j,
+			                     num_samples,
+			                     PRNG_BSDF_U,
+			                     &bssrdf_u,
+			                     &bssrdf_v);
+
+			/* intersection is expensive so avoid doing multiple times for the same input */
+			if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+				RNG lcg_state = branched_state->lcg_state;
+				SubsurfaceIntersection ss_isect_private;
+
+				branched_state->num_hits = subsurface_scatter_multi_intersect(kg,
+				                                                              &ss_isect_private,
+				                                                              sd,
+				                                                              sc,
+				                                                              &lcg_state,
+				                                                              bssrdf_u, bssrdf_v,
+				                                                              true);
+
+				branched_state->lcg_state = lcg_state;
+				*ss_isect = ss_isect_private;
+			}
+
+#ifdef __VOLUME__
+			Ray volume_ray = branched_state->ray;
+			bool need_update_volume_stack =
+			        kernel_data.integrator.use_volumes &&
+			        sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+#endif  /* __VOLUME__ */
+
+			/* compute lighting with the BSDF closure */
+			for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
+				ShaderData *bssrdf_sd = &kernel_split_state.sd[ray_index];
+				*bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
+				                   * important as the indirect path will write into bssrdf_sd */
+
+				SubsurfaceIntersection ss_isect_private = *ss_isect;
+				subsurface_scatter_multi_setup(kg,
+				                               &ss_isect_private,
+				                               hit,
+				                               bssrdf_sd,
+				                               &branched_state->path_state,
+				                               branched_state->path_state.flag,
+				                               sc,
+				                               true);
+				*ss_isect = ss_isect_private;
+
+				ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
+				*hit_state = branched_state->path_state;
+
+				path_state_branch(hit_state, j, num_samples);
+
+#ifdef __VOLUME__
+				if(need_update_volume_stack) {
+					/* Setup ray from previous surface point to the new one. */
+					float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
+					volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+					/* this next part is expensive as it does scene intersection so only do once */
+					if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+						for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
+							branched_state->volume_stack[k] = hit_state->volume_stack[k];
+						}
+
+						kernel_volume_stack_update_for_subsurface(kg,
+						                                          emission_sd,
+						                                          &volume_ray,
+						                                          branched_state->volume_stack);
+					}
+
+					for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
+						hit_state->volume_stack[k] = branched_state->volume_stack[k];
+					}
+				}
+#endif  /* __VOLUME__ */
+
+#ifdef __EMISSION__
+				if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+					/* direct light */
+					if(kernel_data.integrator.use_direct_light) {
+						int all = (kernel_data.integrator.sample_all_lights_direct) ||
+							      (branched_state->path_state.flag & PATH_RAY_SHADOW_CATCHER);
+						kernel_branched_path_surface_connect_light(kg,
+						                                           &rng,
+						                                           bssrdf_sd,
+						                                           emission_sd,
+						                                           hit_state,
+						                                           branched_state->throughput,
+						                                           num_samples_inv,
+						                                           L,
+						                                           all);
+					}
+				}
+#endif  /* __EMISSION__ */
+
+				/* indirect light */
+				if(kernel_split_branched_path_surface_indirect_light_iter(kg,
+				                                                          ray_index,
+				                                                          num_samples_inv,
+				                                                          bssrdf_sd,
+				                                                          false))
+				{
+					branched_state->ss_next_closure = i;
+					branched_state->ss_next_sample = j;
+					branched_state->next_hit = hit;
+
+					return true;
+				}
+
+				branched_state->next_closure = 0;
+			}
+
+			branched_state->next_hit = 0;
+		}
+
+		branched_state->ss_next_sample = 0;
+	}
+
+	kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+	return false;
+}
+
+#endif  /* __BRANCHED_PATH__ && __SUBSURFACE__ */
+
+ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
+{
+	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+	if(thread_index == 0) {
+		/* We will empty both queues in this kernel. */
+		kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+		kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
 	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 	ray_index = get_ray_index(kg, ray_index,
 	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
 	                          kernel_split_state.queue_data,
 	                          kernel_split_params.queue_size,
-	                          0);
-
-#ifdef __COMPUTE_DEVICE_GPU__
-	/* If we are executing on a GPU device, we exit all threads that are not
-	 * required.
-	 *
-	 * If we are executing on a CPU device, then we need to keep all threads
-	 * active since we have barrier() calls later in the kernel. CPU devices,
-	 * expect all threads to execute barrier statement.
-	 */
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
-#endif
-
-	char enqueue_flag = 0;
-
-#ifndef __COMPUTE_DEVICE_GPU__
-	if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
+	                          1);
+	get_ray_index(kg, thread_index,
+	              QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+	              kernel_split_state.queue_data,
+	              kernel_split_params.queue_size,
+	              1);
 
+#ifdef __SUBSURFACE__
 	ccl_global char *ray_state = kernel_split_state.ray_state;
 	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
 	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
@@ -64,34 +228,85 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
 
 	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
 		if(sd->flag & SD_BSSRDF) {
-			if(kernel_path_subsurface_scatter(kg,
-			                                  sd,
-			                                  emission_sd,
-			                                  L,
-			                                  state,
-			                                  &rng,
-			                                  ray,
-			                                  throughput,
-			                                  ss_indirect)) {
-				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-				enqueue_flag = 1;
+
+#ifdef __BRANCHED_PATH__
+			if(!kernel_data.integrator.branched) {
+#endif
+				if(kernel_path_subsurface_scatter(kg,
+					                              sd,
+					                              emission_sd,
+					                              L,
+					                              state,
+					                              &rng,
+					                              ray,
+					                              throughput,
+					                              ss_indirect)) {
+					kernel_split_path_end(kg, ray_index);
+				}
+#ifdef __BRANCHED_PATH__
+			}
+			else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+				float bssrdf_probability;
+				ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
+
+				/* modify throughput for picking bssrdf or bsdf */
+				*throughput *= bssrdf_probability;
+
+				/* do bssrdf scatter step if we picked a bssrdf closure */
+				if(sc) {
+					uint lcg_state = lcg_state_init(&rng, state->rng_offset, state->sample, 0x68bc21eb);
+
+					float bssrdf_u, bssrdf_v;
+					path_state_rng_2D(kg,
+						              &rng,
+						              state,
+						              PRNG_BSDF_U,
+						              &bssrdf_u, &bssrdf_v);
+					subsurface_scatter_step(kg,
+						                    sd,
+						                    state,
+						                    state->flag,
+						                    sc,
+						                    &lcg_state,
+						                    bssrdf_u, bssrdf_v,
+						                    false);
+				}
+			}
+			else {
+				kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
+
+				if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+					ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+				}
 			}
+#endif
 		}
 		kernel_split_state.rng[ray_index] = rng;
 	}
 
-#ifndef __COMPUTE_DEVICE_GPU__
+#  ifdef __BRANCHED_PATH__
+	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+		kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
 	}
-#endif
 
-	/* Enqueue RAY_UPDATE_BUFFER rays. */
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
+	/* iter loop */
+	ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+	                          QUEUE_SUBSURFACE_INDIRECT_ITER,
+	                          kernel_split_state.queue_data,
+	                          kernel_split_params.queue_size,
+	                          1);
+
+	if(IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
+		/* for render passes, sum and reset indirect light pass variables
+		 * for the next samples */
+		path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+		path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+		if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+		}
+	}
+#  endif  /* __BRANCHED_PATH__ */
 
 #endif  /* __SUBSURFACE__ */
author	Mai Lavelle <mai.lavelle@gmail.com>	2017-03-21 05:31:54 +0300
committer	Mai Lavelle <mai.lavelle@gmail.com>	2017-05-02 21:26:46 +0300
commit	915766f42df56a0fee47e8eb21f8b024f01afe18 (patch)
tree	52b29b9ff2385e42221e2d39741a220354937e63 /intern/cycles
parent	89b1805df624bf665336baaf51650928c556456b (diff)