23 files changed, 1713 insertions, 1759 deletions
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
index ed0a82067f1..e08d87ab618 100644
--- a/intern/cycles/kernel/split/kernel_branched.h
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -19,215 +19,213 @@ CCL_NAMESPACE_BEGIN
 #ifdef __BRANCHED_PATH__
 
 /* sets up the various state needed to do an indirect loop */
-ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg,
+                                                                     int ray_index)
 {
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
 
-	/* save a copy of the state to restore later */
-#define BRANCHED_STORE(name) \
-		branched_state->name = kernel_split_state.name[ray_index];
+  /* save a copy of the state to restore later */
+#  define BRANCHED_STORE(name) branched_state->name = kernel_split_state.name[ray_index];
 
-	BRANCHED_STORE(path_state);
-	BRANCHED_STORE(throughput);
-	BRANCHED_STORE(ray);
-	BRANCHED_STORE(isect);
-	BRANCHED_STORE(ray_state);
+  BRANCHED_STORE(path_state);
+  BRANCHED_STORE(throughput);
+  BRANCHED_STORE(ray);
+  BRANCHED_STORE(isect);
+  BRANCHED_STORE(ray_state);
 
-	*kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index);
-	for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
-		kernel_split_sd(branched_state_sd, ray_index)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
-	}
+  *kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index);
+  for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
+    kernel_split_sd(branched_state_sd, ray_index)->closure[i] =
+        kernel_split_sd(sd, ray_index)->closure[i];
+  }
 
-#undef BRANCHED_STORE
+#  undef BRANCHED_STORE
 
-	/* set loop counters to intial position */
-	branched_state->next_closure = 0;
-	branched_state->next_sample = 0;
+  /* set loop counters to intial position */
+  branched_state->next_closure = 0;
+  branched_state->next_sample = 0;
 }
 
 /* ends an indirect loop and restores the previous state */
-ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg,
+                                                                    int ray_index)
 {
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
 
-	/* restore state */
-#define BRANCHED_RESTORE(name) \
-		kernel_split_state.name[ray_index] = branched_state->name;
+  /* restore state */
+#  define BRANCHED_RESTORE(name) kernel_split_state.name[ray_index] = branched_state->name;
 
-	BRANCHED_RESTORE(path_state);
-	BRANCHED_RESTORE(throughput);
-	BRANCHED_RESTORE(ray);
-	BRANCHED_RESTORE(isect);
-	BRANCHED_RESTORE(ray_state);
+  BRANCHED_RESTORE(path_state);
+  BRANCHED_RESTORE(throughput);
+  BRANCHED_RESTORE(ray);
+  BRANCHED_RESTORE(isect);
+  BRANCHED_RESTORE(ray_state);
 
-	*kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index);
-	for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
-		kernel_split_sd(sd, ray_index)->closure[i] = kernel_split_sd(branched_state_sd, ray_index)->closure[i];
-	}
+  *kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index);
+  for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
+    kernel_split_sd(sd, ray_index)->closure[i] =
+        kernel_split_sd(branched_state_sd, ray_index)->closure[i];
+  }
 
-#undef BRANCHED_RESTORE
+#  undef BRANCHED_RESTORE
 
-	/* leave indirect loop */
-	REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
+  /* leave indirect loop */
+  REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
 }
 
-ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, int ray_index)
+ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg,
+                                                                   int ray_index)
 {
-	ccl_global char *ray_state = kernel_split_state.ray_state;
+  ccl_global char *ray_state = kernel_split_state.ray_state;
 
-	int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS,
-		kernel_split_state.queue_data, kernel_split_params.queue_size, kernel_split_params.queue_index);
+  int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS,
+                                       kernel_split_state.queue_data,
+                                       kernel_split_params.queue_size,
+                                       kernel_split_params.queue_index);
 
-	if(!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) {
-		return false;
-	}
+  if (!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) {
+    return false;
+  }
 
-#define SPLIT_DATA_ENTRY(type, name, num) \
-		if(num) { \
-			kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \
-		}
-	SPLIT_DATA_ENTRIES_BRANCHED_SHARED
-#undef SPLIT_DATA_ENTRY
+#  define SPLIT_DATA_ENTRY(type, name, num) \
+    if (num) { \
+      kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \
+    }
+  SPLIT_DATA_ENTRIES_BRANCHED_SHARED
+#  undef SPLIT_DATA_ENTRY
 
-	*kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index);
-	for(int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) {
-		kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
-	}
+  *kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index);
+  for (int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) {
+    kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
+  }
 
-	kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0;
-	kernel_split_state.branched_state[inactive_ray].original_ray = ray_index;
-	kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false;
+  kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0;
+  kernel_split_state.branched_state[inactive_ray].original_ray = ray_index;
+  kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false;
 
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
 
-	path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
-	path_radiance_copy_indirect(inactive_L, L);
+  path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
+  path_radiance_copy_indirect(inactive_L, L);
 
-	ray_state[inactive_ray] = RAY_REGENERATED;
-	ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED);
-	ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT));
+  ray_state[inactive_ray] = RAY_REGENERATED;
+  ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED);
+  ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT));
 
-	atomic_fetch_and_inc_uint32((ccl_global uint*)&kernel_split_state.branched_state[ray_index].shared_sample_count);
+  atomic_fetch_and_inc_uint32(
+      (ccl_global uint *)&kernel_split_state.branched_state[ray_index].shared_sample_count);
 
-	return true;
+  return true;
 }
 
 /* bounce off surface and integrate indirect light */
-ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg,
-                                                                                int ray_index,
-                                                                                float num_samples_adjust,
-                                                                                ShaderData *saved_sd,
-                                                                                bool reset_path_state,
-                                                                                bool wait_for_shared)
+ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(
+    KernelGlobals *kg,
+    int ray_index,
+    float num_samples_adjust,
+    ShaderData *saved_sd,
+    bool reset_path_state,
+    bool wait_for_shared)
 {
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
-
-	ShaderData *sd = saved_sd;
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	float3 throughput = branched_state->throughput;
-	ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
-
-	float sum_sample_weight = 0.0f;
-#ifdef __DENOISING_FEATURES__
-	if(ps->denoising_feature_weight > 0.0f) {
-		for(int i = 0; i < sd->num_closure; i++) {
-			const ShaderClosure *sc = &sd->closure[i];
-
-			/* transparency is not handled here, but in outer loop */
-			if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
-				continue;
-			}
-
-			sum_sample_weight += sc->sample_weight;
-		}
-	}
-	else {
-		sum_sample_weight = 1.0f;
-	}
-#endif  /* __DENOISING_FEATURES__ */
-
-	for(int i = branched_state->next_closure; i < sd->num_closure; i++) {
-		const ShaderClosure *sc = &sd->closure[i];
-
-		if(!CLOSURE_IS_BSDF(sc->type))
-			continue;
-		/* transparency is not handled here, but in outer loop */
-		if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
-			continue;
-
-		int num_samples;
-
-		if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
-			num_samples = kernel_data.integrator.diffuse_samples;
-		else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
-			num_samples = 1;
-		else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
-			num_samples = kernel_data.integrator.glossy_samples;
-		else
-			num_samples = kernel_data.integrator.transmission_samples;
-
-		num_samples = ceil_to_int(num_samples_adjust*num_samples);
-
-		float num_samples_inv = num_samples_adjust/num_samples;
-
-		for(int j = branched_state->next_sample; j < num_samples; j++) {
-			if(reset_path_state) {
-				*ps = branched_state->path_state;
-			}
-
-			ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
-
-			ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
-			*tp = throughput;
-
-			ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
-
-			if(!kernel_branched_path_surface_bounce(kg,
-			                                        sd,
-			                                        sc,
-			                                        j,
-			                                        num_samples,
-			                                        tp,
-			                                        ps,
-			                                        &L->state,
-			                                        bsdf_ray,
-			                                        sum_sample_weight))
-			{
-				continue;
-			}
-
-			ps->rng_hash = branched_state->path_state.rng_hash;
-
-			/* update state for next iteration */
-			branched_state->next_closure = i;
-			branched_state->next_sample = j+1;
-
-			/* start the indirect path */
-			*tp *= num_samples_inv;
-
-			if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
-				continue;
-			}
-
-			return true;
-		}
-
-		branched_state->next_sample = 0;
-	}
-
-	branched_state->next_closure = sd->num_closure;
-
-	if(wait_for_shared) {
-		branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
-		if(branched_state->waiting_on_shared_samples) {
-			return true;
-		}
-	}
-
-	return false;
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+  ShaderData *sd = saved_sd;
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  float3 throughput = branched_state->throughput;
+  ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+
+  float sum_sample_weight = 0.0f;
+#  ifdef __DENOISING_FEATURES__
+  if (ps->denoising_feature_weight > 0.0f) {
+    for (int i = 0; i < sd->num_closure; i++) {
+      const ShaderClosure *sc = &sd->closure[i];
+
+      /* transparency is not handled here, but in outer loop */
+      if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+        continue;
+      }
+
+      sum_sample_weight += sc->sample_weight;
+    }
+  }
+  else {
+    sum_sample_weight = 1.0f;
+  }
+#  endif /* __DENOISING_FEATURES__ */
+
+  for (int i = branched_state->next_closure; i < sd->num_closure; i++) {
+    const ShaderClosure *sc = &sd->closure[i];
+
+    if (!CLOSURE_IS_BSDF(sc->type))
+      continue;
+    /* transparency is not handled here, but in outer loop */
+    if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
+      continue;
+
+    int num_samples;
+
+    if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+      num_samples = kernel_data.integrator.diffuse_samples;
+    else if (CLOSURE_IS_BSDF_BSSRDF(sc->type))
+      num_samples = 1;
+    else if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+      num_samples = kernel_data.integrator.glossy_samples;
+    else
+      num_samples = kernel_data.integrator.transmission_samples;
+
+    num_samples = ceil_to_int(num_samples_adjust * num_samples);
+
+    float num_samples_inv = num_samples_adjust / num_samples;
+
+    for (int j = branched_state->next_sample; j < num_samples; j++) {
+      if (reset_path_state) {
+        *ps = branched_state->path_state;
+      }
+
+      ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
+
+      ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+      *tp = throughput;
+
+      ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
+
+      if (!kernel_branched_path_surface_bounce(
+              kg, sd, sc, j, num_samples, tp, ps, &L->state, bsdf_ray, sum_sample_weight)) {
+        continue;
+      }
+
+      ps->rng_hash = branched_state->path_state.rng_hash;
+
+      /* update state for next iteration */
+      branched_state->next_closure = i;
+      branched_state->next_sample = j + 1;
+
+      /* start the indirect path */
+      *tp *= num_samples_inv;
+
+      if (kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+        continue;
+      }
+
+      return true;
+    }
+
+    branched_state->next_sample = 0;
+  }
+
+  branched_state->next_closure = sd->num_closure;
+
+  if (wait_for_shared) {
+    branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+    if (branched_state->waiting_on_shared_samples) {
+      return true;
+    }
+  }
+
+  return false;
 }
 
-#endif  /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index 18eec6372f1..e77743350dc 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -41,132 +41,133 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_buffer_update(KernelGlobals *kg,
                                      ccl_local_param unsigned int *local_queue_atomics)
 {
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(ray_index == 0) {
-		/* We will empty this queue in this kernel. */
-		kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
-	}
-	char enqueue_flag = 0;
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (ray_index == 0) {
+    /* We will empty this queue in this kernel. */
+    kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+  }
+  char enqueue_flag = 0;
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
 
 #ifdef __COMPUTE_DEVICE_GPU__
-	/* If we are executing on a GPU device, we exit all threads that are not
-	 * required.
-	 *
-	 * If we are executing on a CPU device, then we need to keep all threads
-	 * active since we have barrier() calls later in the kernel. CPU devices,
-	 * expect all threads to execute barrier statement.
-	 */
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  /* If we are executing on a GPU device, we exit all threads that are not
+   * required.
+   *
+   * If we are executing on a CPU device, then we need to keep all threads
+   * active since we have barrier() calls later in the kernel. CPU devices,
+   * expect all threads to execute barrier statement.
+   */
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 #endif
 
 #ifndef __COMPUTE_DEVICE_GPU__
-	if(ray_index != QUEUE_EMPTY_SLOT) {
+  if (ray_index != QUEUE_EMPTY_SLOT) {
 #endif
 
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-	ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-	bool ray_was_updated = false;
-
-	if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
-		ray_was_updated = true;
-		uint sample = state->sample;
-		uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
-		ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
-
-		/* accumulate result in output buffer */
-		kernel_write_result(kg, buffer, sample, L);
-
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
-	}
-
-	if(kernel_data.film.cryptomatte_passes) {
-		/* Make sure no thread is writing to the buffers. */
-		ccl_barrier(CCL_LOCAL_MEM_FENCE);
-		if(ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) {
-			uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
-			ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
-			ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
-			kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
-		}
-	}
-
-	if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
-		/* We have completed current work; So get next work */
-		ccl_global uint *work_pools = kernel_split_params.work_pools;
-		uint total_work_size = kernel_split_params.total_work_size;
-		uint work_index;
-
-		if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
-			/* If work is invalid, this means no more work is available and the thread may exit */
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
-		}
-
-		if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
-			ccl_global WorkTile *tile = &kernel_split_params.tile;
-			uint x, y, sample;
-			get_work_pixel(tile, work_index, &x, &y, &sample);
-
-			/* Store buffer offset for writing to passes. */
-			uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
-			kernel_split_state.buffer_offset[ray_index] = buffer_offset;
-
-			/* Initialize random numbers and ray. */
-			uint rng_hash;
-			kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray);
-
-			if(ray->t != 0.0f) {
-				/* Initialize throughput, path radiance, Ray, PathState;
-				 * These rays proceed with path-iteration.
-				 */
-				*throughput = make_float3(1.0f, 1.0f, 1.0f);
-				path_radiance_init(L, kernel_data.film.use_light_pass);
-				path_state_init(kg,
-				                AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
-				                state,
-				                rng_hash,
-				                sample,
-				                ray);
+    ccl_global char *ray_state = kernel_split_state.ray_state;
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+    ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+    ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+    bool ray_was_updated = false;
+
+    if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+      ray_was_updated = true;
+      uint sample = state->sample;
+      uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+      ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+
+      /* accumulate result in output buffer */
+      kernel_write_result(kg, buffer, sample, L);
+
+      ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+    }
+
+    if (kernel_data.film.cryptomatte_passes) {
+      /* Make sure no thread is writing to the buffers. */
+      ccl_barrier(CCL_LOCAL_MEM_FENCE);
+      if (ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) {
+        uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+        ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+        ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+        kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
+      }
+    }
+
+    if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
+      /* We have completed current work; So get next work */
+      ccl_global uint *work_pools = kernel_split_params.work_pools;
+      uint total_work_size = kernel_split_params.total_work_size;
+      uint work_index;
+
+      if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
+        /* If work is invalid, this means no more work is available and the thread may exit */
+        ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+      }
+
+      if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
+        ccl_global WorkTile *tile = &kernel_split_params.tile;
+        uint x, y, sample;
+        get_work_pixel(tile, work_index, &x, &y, &sample);
+
+        /* Store buffer offset for writing to passes. */
+        uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+        kernel_split_state.buffer_offset[ray_index] = buffer_offset;
+
+        /* Initialize random numbers and ray. */
+        uint rng_hash;
+        kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray);
+
+        if (ray->t != 0.0f) {
+          /* Initialize throughput, path radiance, Ray, PathState;
+         * These rays proceed with path-iteration.
+         */
+          *throughput = make_float3(1.0f, 1.0f, 1.0f);
+          path_radiance_init(L, kernel_data.film.use_light_pass);
+          path_state_init(kg,
+                          AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+                          state,
+                          rng_hash,
+                          sample,
+                          ray);
 #ifdef __SUBSURFACE__
-				kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+          kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
 #endif
-				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-				enqueue_flag = 1;
-			}
-			else {
-				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
-			}
-		}
-	}
+          ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+          enqueue_flag = 1;
+        }
+        else {
+          ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+        }
+      }
+    }
 
 #ifndef __COMPUTE_DEVICE_GPU__
-	}
+  }
 #endif
 
-	/* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
-	 * These rays will be made active during next SceneIntersectkernel.
-	 */
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
+  /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
+   * These rays will be made active during next SceneIntersectkernel.
+   */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          local_queue_atomics,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 77fb61b80a8..52930843f56 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -28,82 +28,88 @@ ccl_device void kernel_data_init(
 #else
 void KERNEL_FUNCTION_FULL_NAME(data_init)(
 #endif
-        KernelGlobals *kg,
-        ccl_constant KernelData *data,
-        ccl_global void *split_data_buffer,
-        int num_elements,
-        ccl_global char *ray_state,
+    KernelGlobals *kg,
+    ccl_constant KernelData *data,
+    ccl_global void *split_data_buffer,
+    int num_elements,
+    ccl_global char *ray_state,
 
 #ifdef __KERNEL_OPENCL__
-		KERNEL_BUFFER_PARAMS,
+    KERNEL_BUFFER_PARAMS,
 #endif
 
-        int start_sample,
-        int end_sample,
-        int sx, int sy, int sw, int sh, int offset, int stride,
-        ccl_global int *Queue_index,                 /* Tracks the number of elements in queues */
-        int queuesize,                               /* size (capacity) of the queue */
-        ccl_global char *use_queues_flag,            /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
-        ccl_global unsigned int *work_pools,      /* Work pool for each work group */
-        unsigned int num_samples,
-        ccl_global float *buffer)
+    int start_sample,
+    int end_sample,
+    int sx,
+    int sy,
+    int sw,
+    int sh,
+    int offset,
+    int stride,
+    ccl_global int *Queue_index, /* Tracks the number of elements in queues */
+    int queuesize,               /* size (capacity) of the queue */
+    ccl_global char *
+        use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
+    ccl_global unsigned int *work_pools, /* Work pool for each work group */
+    unsigned int num_samples,
+    ccl_global float *buffer)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, data_init);
+  STUB_ASSERT(KERNEL_ARCH, data_init);
 #else
 
-#ifdef __KERNEL_OPENCL__
-	kg->data = data;
-#endif
+#  ifdef __KERNEL_OPENCL__
+  kg->data = data;
+#  endif
 
-	kernel_split_params.tile.x = sx;
-	kernel_split_params.tile.y = sy;
-	kernel_split_params.tile.w = sw;
-	kernel_split_params.tile.h = sh;
+  kernel_split_params.tile.x = sx;
+  kernel_split_params.tile.y = sy;
+  kernel_split_params.tile.w = sw;
+  kernel_split_params.tile.h = sh;
 
-	kernel_split_params.tile.start_sample = start_sample;
-	kernel_split_params.tile.num_samples = num_samples;
+  kernel_split_params.tile.start_sample = start_sample;
+  kernel_split_params.tile.num_samples = num_samples;
 
-	kernel_split_params.tile.offset = offset;
-	kernel_split_params.tile.stride = stride;
+  kernel_split_params.tile.offset = offset;
+  kernel_split_params.tile.stride = stride;
 
-	kernel_split_params.tile.buffer = buffer;
+  kernel_split_params.tile.buffer = buffer;
 
-	kernel_split_params.total_work_size = sw * sh * num_samples;
+  kernel_split_params.total_work_size = sw * sh * num_samples;
 
-	kernel_split_params.work_pools = work_pools;
+  kernel_split_params.work_pools = work_pools;
 
-	kernel_split_params.queue_index = Queue_index;
-	kernel_split_params.queue_size = queuesize;
-	kernel_split_params.use_queues_flag = use_queues_flag;
+  kernel_split_params.queue_index = Queue_index;
+  kernel_split_params.queue_size = queuesize;
+  kernel_split_params.use_queues_flag = use_queues_flag;
 
-	split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
+  split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
 
-#ifdef __KERNEL_OPENCL__
-	kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
-	kernel_set_buffer_info(kg);
-#endif
+#  ifdef __KERNEL_OPENCL__
+  kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+  kernel_set_buffer_info(kg);
+#  endif
+
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+  /* Initialize queue data and queue index. */
+  if (thread_index < queuesize) {
+    for (int i = 0; i < NUM_QUEUES; i++) {
+      kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+    }
+  }
+
+  if (thread_index == 0) {
+    for (int i = 0; i < NUM_QUEUES; i++) {
+      Queue_index[i] = 0;
+    }
 
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-
-	/* Initialize queue data and queue index. */
-	if(thread_index < queuesize) {
-		for(int i = 0; i < NUM_QUEUES; i++) {
-			kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
-		}
-	}
-
-	if(thread_index == 0) {
-		for(int i = 0; i < NUM_QUEUES; i++) {
-			Queue_index[i] = 0;
-		}
-
-		/* The scene-intersect kernel should not use the queues very first time.
-		 * since the queue would be empty.
-		 */
-		*use_queues_flag = 0;
-	}
-#endif  /* KERENL_STUB */
+    /* The scene-intersect kernel should not use the queues very first time.
+     * since the queue would be empty.
+     */
+    *use_queues_flag = 0;
+  }
+#endif /* KERENL_STUB */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index ca79602c565..b2ca59d60cc 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -43,116 +43,111 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_direct_lighting(KernelGlobals *kg,
                                        ccl_local_param unsigned int *local_queue_atomics)
 {
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	char enqueue_flag = 0;
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
-
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
-
-		/* direct lighting */
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  char enqueue_flag = 0;
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
+
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+
+    /* direct lighting */
 #ifdef __EMISSION__
-		bool flag = (kernel_data.integrator.use_direct_light &&
-		             (sd->flag & SD_BSDF_HAS_EVAL));
+    bool flag = (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL));
 
 #  ifdef __BRANCHED_PATH__
-		if(flag && kernel_data.integrator.branched) {
-			flag = false;
-			enqueue_flag = 1;
-		}
-#  endif  /* __BRANCHED_PATH__ */
+    if (flag && kernel_data.integrator.branched) {
+      flag = false;
+      enqueue_flag = 1;
+    }
+#  endif /* __BRANCHED_PATH__ */
 
 #  ifdef __SHADOW_TRICKS__
-		if(flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
-			flag = false;
-			enqueue_flag = 1;
-		}
-#  endif  /* __SHADOW_TRICKS__ */
-
-		if(flag) {
-			/* Sample illumination from lights to find path contribution. */
-			float light_u, light_v;
-			path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-			float terminate = path_state_rng_light_termination(kg, state);
-
-			LightSample ls;
-			if(light_sample(kg,
-			                light_u, light_v,
-			                sd->time,
-			                sd->P,
-			                state->bounce,
-			                &ls)) {
-
-				Ray light_ray;
-				light_ray.time = sd->time;
-
-				BsdfEval L_light;
-				bool is_lamp;
-				if(direct_emission(kg,
-				                   sd,
-				                   AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
-				                   &ls,
-				                   state,
-				                   &light_ray,
-				                   &L_light,
-				                   &is_lamp,
-				                   terminate))
-				{
-					/* Write intermediate data to global memory to access from
-					 * the next kernel.
-					 */
-					kernel_split_state.light_ray[ray_index] = light_ray;
-					kernel_split_state.bsdf_eval[ray_index] = L_light;
-					kernel_split_state.is_lamp[ray_index] = is_lamp;
-					/* Mark ray state for next shadow kernel. */
-					enqueue_flag = 1;
-				}
-			}
-		}
-#endif  /* __EMISSION__ */
-	}
+    if (flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
+      flag = false;
+      enqueue_flag = 1;
+    }
+#  endif /* __SHADOW_TRICKS__ */
+
+    if (flag) {
+      /* Sample illumination from lights to find path contribution. */
+      float light_u, light_v;
+      path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+      float terminate = path_state_rng_light_termination(kg, state);
+
+      LightSample ls;
+      if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+
+        Ray light_ray;
+        light_ray.time = sd->time;
+
+        BsdfEval L_light;
+        bool is_lamp;
+        if (direct_emission(kg,
+                            sd,
+                            AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+                            &ls,
+                            state,
+                            &light_ray,
+                            &L_light,
+                            &is_lamp,
+                            terminate)) {
+          /* Write intermediate data to global memory to access from
+           * the next kernel.
+           */
+          kernel_split_state.light_ray[ray_index] = light_ray;
+          kernel_split_state.bsdf_eval[ray_index] = L_light;
+          kernel_split_state.is_lamp[ray_index] = is_lamp;
+          /* Mark ray state for next shadow kernel. */
+          enqueue_flag = 1;
+        }
+      }
+    }
+#endif /* __EMISSION__ */
+  }
 
 #ifdef __EMISSION__
-	/* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_SHADOW_RAY_CAST_DL_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
+  /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          local_queue_atomics,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 #endif
 
 #ifdef __BRANCHED_PATH__
-	/* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays
-	 * this is the last kernel before next_iteration_setup that uses local atomics so we do this here
-	 */
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_LIGHT_INDIRECT_ITER,
-	                        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER),
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
-
-#endif  /* __BRANCHED_PATH__ */
+  /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays
+   * this is the last kernel before next_iteration_setup that uses local atomics so we do this here
+   */
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  enqueue_ray_index_local(
+      ray_index,
+      QUEUE_LIGHT_INDIRECT_ITER,
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER),
+      kernel_split_params.queue_size,
+      local_queue_atomics,
+      kernel_split_state.queue_data,
+      kernel_split_params.queue_index);
+
+#endif /* __BRANCHED_PATH__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index fb5bd3d48dd..45b839db05f 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -18,203 +18,210 @@ CCL_NAMESPACE_BEGIN
 
 #if defined(__BRANCHED_PATH__) && defined(__VOLUME__)
 
-ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg,
+                                                                             int ray_index)
 {
-	kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+  kernel_split_branched_path_indirect_loop_init(kg, ray_index);
 
-	ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
+  ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
 }
 
-ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index)
+ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg,
+                                                                               int ray_index)
 {
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
 
-	ShaderData *sd = kernel_split_sd(sd, ray_index);
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+  ShaderData *sd = kernel_split_sd(sd, ray_index);
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
 
-	/* GPU: no decoupled ray marching, scatter probalistically */
-	int num_samples = kernel_data.integrator.volume_samples;
-	float num_samples_inv = 1.0f/num_samples;
+  /* GPU: no decoupled ray marching, scatter probalistically */
+  int num_samples = kernel_data.integrator.volume_samples;
+  float num_samples_inv = 1.0f / num_samples;
 
-	Ray volume_ray = branched_state->ray;
-	volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX;
+  Ray volume_ray = branched_state->ray;
+  volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ?
+                     branched_state->isect.t :
+                     FLT_MAX;
 
-	bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
+  bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
 
-	for(int j = branched_state->next_sample; j < num_samples; j++) {
-		ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
-		*ps = branched_state->path_state;
+  for (int j = branched_state->next_sample; j < num_samples; j++) {
+    ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+    *ps = branched_state->path_state;
 
-		ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
-		*pray = branched_state->ray;
+    ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
+    *pray = branched_state->ray;
 
-		ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
-		*tp = branched_state->throughput * num_samples_inv;
+    ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+    *tp = branched_state->throughput * num_samples_inv;
 
-		/* branch RNG state */
-		path_state_branch(ps, j, num_samples);
+    /* branch RNG state */
+    path_state_branch(ps, j, num_samples);
 
-		/* integrate along volume segment with distance sampling */
-		VolumeIntegrateResult result = kernel_volume_integrate(
-			kg, ps, sd, &volume_ray, L, tp, heterogeneous);
+    /* integrate along volume segment with distance sampling */
+    VolumeIntegrateResult result = kernel_volume_integrate(
+        kg, ps, sd, &volume_ray, L, tp, heterogeneous);
 
 #  ifdef __VOLUME_SCATTER__
-		if(result == VOLUME_PATH_SCATTERED) {
-			/* direct lighting */
-			kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
-
-			/* indirect light bounce */
-			if(!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) {
-				continue;
-			}
-
-			/* start the indirect path */
-			branched_state->next_closure = 0;
-			branched_state->next_sample = j+1;
-
-			/* Attempting to share too many samples is slow for volumes as it causes us to
-			 * loop here more and have many calls to kernel_volume_integrate which evaluates
-			 * shaders. The many expensive shader evaluations cause the work load to become
-			 * unbalanced and many threads to become idle in this kernel. Limiting the
-			 * number of shared samples here helps quite a lot.
-			 */
-			if(branched_state->shared_sample_count < 2) {
-				if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
-					continue;
-				}
-			}
-
-			return true;
-		}
+    if (result == VOLUME_PATH_SCATTERED) {
+      /* direct lighting */
+      kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
+
+      /* indirect light bounce */
+      if (!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) {
+        continue;
+      }
+
+      /* start the indirect path */
+      branched_state->next_closure = 0;
+      branched_state->next_sample = j + 1;
+
+      /* Attempting to share too many samples is slow for volumes as it causes us to
+       * loop here more and have many calls to kernel_volume_integrate which evaluates
+       * shaders. The many expensive shader evaluations cause the work load to become
+       * unbalanced and many threads to become idle in this kernel. Limiting the
+       * number of shared samples here helps quite a lot.
+       */
+      if (branched_state->shared_sample_count < 2) {
+        if (kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+          continue;
+        }
+      }
+
+      return true;
+    }
 #  endif
-	}
+  }
 
-	branched_state->next_sample = num_samples;
+  branched_state->next_sample = num_samples;
 
-	branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
-	if(branched_state->waiting_on_shared_samples) {
-		return true;
-	}
+  branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+  if (branched_state->waiting_on_shared_samples) {
+    return true;
+  }
 
-	kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+  kernel_split_branched_path_indirect_loop_end(kg, ray_index);
 
-	/* todo: avoid this calculation using decoupled ray marching */
-	float3 throughput = kernel_split_state.throughput[ray_index];
-	kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
-	kernel_split_state.throughput[ray_index] = throughput;
+  /* todo: avoid this calculation using decoupled ray marching */
+  float3 throughput = kernel_split_state.throughput[ray_index];
+  kernel_volume_shadow(
+      kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
+  kernel_split_state.throughput[ray_index] = throughput;
 
-	return false;
+  return false;
 }
 
-#endif  /* __BRANCHED_PATH__ && __VOLUME__ */
+#endif /* __BRANCHED_PATH__ && __VOLUME__ */
 
 ccl_device void kernel_do_volume(KernelGlobals *kg)
 {
 #ifdef __VOLUME__
-	/* We will empty this queue in this kernel. */
-	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
-		kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+  /* We will empty this queue in this kernel. */
+  if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+    kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
 #  ifdef __BRANCHED_PATH__
-		kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
-#  endif  /* __BRANCHED_PATH__ */
-	}
-
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-
-	if(*kernel_split_params.use_queues_flag) {
-		ray_index = get_ray_index(kg, ray_index,
-		                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-		                          kernel_split_state.queue_data,
-		                          kernel_split_params.queue_size,
-		                          1);
-	}
-
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
-	   IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
-		ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
-		ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
-		bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
-
-		/* Sanitize volume stack. */
-		if(!hit) {
-			kernel_volume_clean_stack(kg, state->volume_stack);
-		}
-		/* volume attenuation, emission, scatter */
-		if(state->volume_stack[0].shader != SHADER_NONE) {
-			Ray volume_ray = *ray;
-			volume_ray.t = (hit)? isect->t: FLT_MAX;
+    kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
+#  endif /* __BRANCHED_PATH__ */
+  }
+
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+  if (*kernel_split_params.use_queues_flag) {
+    ray_index = get_ray_index(kg,
+                              ray_index,
+                              QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
+                              1);
+  }
+
+  ccl_global char *ray_state = kernel_split_state.ray_state;
+
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+
+  if (IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
+      IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+    ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+    ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+    ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+    ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+    bool hit = !IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
+
+    /* Sanitize volume stack. */
+    if (!hit) {
+      kernel_volume_clean_stack(kg, state->volume_stack);
+    }
+    /* volume attenuation, emission, scatter */
+    if (state->volume_stack[0].shader != SHADER_NONE) {
+      Ray volume_ray = *ray;
+      volume_ray.t = (hit) ? isect->t : FLT_MAX;
 
 #  ifdef __BRANCHED_PATH__
-			if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
-#  endif  /* __BRANCHED_PATH__ */
-				bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+      if (!kernel_data.integrator.branched ||
+          IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#  endif /* __BRANCHED_PATH__ */
+        bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
 
-				{
-					/* integrate along volume segment with distance sampling */
-					VolumeIntegrateResult result = kernel_volume_integrate(
-						kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+        {
+          /* integrate along volume segment with distance sampling */
+          VolumeIntegrateResult result = kernel_volume_integrate(
+              kg, state, sd, &volume_ray, L, throughput, heterogeneous);
 
 #  ifdef __VOLUME_SCATTER__
-					if(result == VOLUME_PATH_SCATTERED) {
-						/* direct lighting */
-						kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
-
-						/* indirect light bounce */
-						if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) {
-							ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-						}
-						else {
-							kernel_split_path_end(kg, ray_index);
-						}
-					}
-#  endif  /* __VOLUME_SCATTER__ */
-				}
+          if (result == VOLUME_PATH_SCATTERED) {
+            /* direct lighting */
+            kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+            /* indirect light bounce */
+            if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) {
+              ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+            }
+            else {
+              kernel_split_path_end(kg, ray_index);
+            }
+          }
+#  endif /* __VOLUME_SCATTER__ */
+        }
 
 #  ifdef __BRANCHED_PATH__
-			}
-			else {
-				kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
-
-				if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
-					ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-				}
-			}
-#  endif  /* __BRANCHED_PATH__ */
-		}
-	}
+      }
+      else {
+        kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
+
+        if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+          ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+        }
+      }
+#  endif /* __BRANCHED_PATH__ */
+    }
+  }
 
 #  ifdef __BRANCHED_PATH__
-	/* iter loop */
-	ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
-	                          QUEUE_VOLUME_INDIRECT_ITER,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
-
-	if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
-		/* for render passes, sum and reset indirect light pass variables
-		 * for the next samples */
-		path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
-		path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
-
-		if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-		}
-	}
-#  endif  /* __BRANCHED_PATH__ */
-
-#endif  /* __VOLUME__ */
+  /* iter loop */
+  ray_index = get_ray_index(kg,
+                            ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+                            QUEUE_VOLUME_INDIRECT_ITER,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
+
+  if (IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
+    /* for render passes, sum and reset indirect light pass variables
+     * for the next samples */
+    path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+    path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+    if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+      ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+    }
+  }
+#  endif /* __BRANCHED_PATH__ */
+
+#endif /* __VOLUME__ */
 }
 
-
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_enqueue_inactive.h b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
index 496355bbc3a..31d2daef616 100644
--- a/intern/cycles/kernel/split/kernel_enqueue_inactive.h
+++ b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
@@ -20,27 +20,27 @@ ccl_device void kernel_enqueue_inactive(KernelGlobals *kg,
                                         ccl_local_param unsigned int *local_queue_atomics)
 {
 #ifdef __BRANCHED_PATH__
-	/* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 
-	char enqueue_flag = 0;
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) {
-		enqueue_flag = 1;
-	}
+  char enqueue_flag = 0;
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) {
+    enqueue_flag = 1;
+  }
 
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_INACTIVE_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
-#endif  /* __BRANCHED_PATH__ */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_INACTIVE_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          local_queue_atomics,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
+#endif /* __BRANCHED_PATH__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index f14eecec2f2..63bc5a8e0ce 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -54,120 +54,112 @@ CCL_NAMESPACE_BEGIN
  */
 
 ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
-        KernelGlobals *kg,
-        ccl_local_param BackgroundAOLocals *locals)
+    KernelGlobals *kg, ccl_local_param BackgroundAOLocals *locals)
 {
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		locals->queue_atomics_bg = 0;
-		locals->queue_atomics_ao = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    locals->queue_atomics_bg = 0;
+    locals->queue_atomics_ao = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 #ifdef __AO__
-	char enqueue_flag = 0;
+  char enqueue_flag = 0;
 #endif
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
 
 #ifdef __COMPUTE_DEVICE_GPU__
-	/* If we are executing on a GPU device, we exit all threads that are not
-	 * required.
-	 *
-	 * If we are executing on a CPU device, then we need to keep all threads
-	 * active since we have barrier() calls later in the kernel. CPU devices,
-	 * expect all threads to execute barrier statement.
-	 */
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
-#endif  /* __COMPUTE_DEVICE_GPU__ */
+  /* If we are executing on a GPU device, we exit all threads that are not
+   * required.
+   *
+   * If we are executing on a CPU device, then we need to keep all threads
+   * active since we have barrier() calls later in the kernel. CPU devices,
+   * expect all threads to execute barrier statement.
+   */
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
+#endif /* __COMPUTE_DEVICE_GPU__ */
 
 #ifndef __COMPUTE_DEVICE_GPU__
-	if(ray_index != QUEUE_EMPTY_SLOT) {
+  if (ray_index != QUEUE_EMPTY_SLOT) {
 #endif
 
-	ccl_global PathState *state = 0x0;
-	float3 throughput;
-
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-	ShaderData *sd = kernel_split_sd(sd, ray_index);
-
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
-		ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
-
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-
-		throughput = kernel_split_state.throughput[ray_index];
-		state = &kernel_split_state.path_state[ray_index];
-
-		if(!kernel_path_shader_apply(kg,
-		                             sd,
-		                             state,
-		                             ray,
-		                             throughput,
-		                             emission_sd,
-		                             L,
-		                             buffer))
-		{
-			kernel_split_path_end(kg, ray_index);
-		}
-	}
-
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		/* Path termination. this is a strange place to put the termination, it's
-		 * mainly due to the mixed in MIS that we use. gives too many unneeded
-		 * shader evaluations, only need emission if we are going to terminate.
-		 */
-		float probability = path_state_continuation_probability(kg, state, throughput);
-
-		if(probability == 0.0f) {
-			kernel_split_path_end(kg, ray_index);
-		}
-		else if(probability < 1.0f) {
-			float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
-			if(terminate >= probability) {
-				kernel_split_path_end(kg, ray_index);
-			}
-			else {
-				kernel_split_state.throughput[ray_index] = throughput/probability;
-			}
-		}
-
-		if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-			PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-			kernel_update_denoising_features(kg, sd, state, L);
-		}
-	}
+    ccl_global PathState *state = 0x0;
+    float3 throughput;
+
+    ccl_global char *ray_state = kernel_split_state.ray_state;
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+      ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+
+      ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+      ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+      PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+      throughput = kernel_split_state.throughput[ray_index];
+      state = &kernel_split_state.path_state[ray_index];
+
+      if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, buffer)) {
+        kernel_split_path_end(kg, ray_index);
+      }
+    }
+
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      /* Path termination. this is a strange place to put the termination, it's
+     * mainly due to the mixed in MIS that we use. gives too many unneeded
+     * shader evaluations, only need emission if we are going to terminate.
+     */
+      float probability = path_state_continuation_probability(kg, state, throughput);
+
+      if (probability == 0.0f) {
+        kernel_split_path_end(kg, ray_index);
+      }
+      else if (probability < 1.0f) {
+        float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+        if (terminate >= probability) {
+          kernel_split_path_end(kg, ray_index);
+        }
+        else {
+          kernel_split_state.throughput[ray_index] = throughput / probability;
+        }
+      }
+
+      if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+        PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+        kernel_update_denoising_features(kg, sd, state, L);
+      }
+    }
 
 #ifdef __AO__
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		/* ambient occlusion */
-		if(kernel_data.integrator.use_ambient_occlusion) {
-			enqueue_flag = 1;
-		}
-	}
-#endif  /* __AO__ */
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      /* ambient occlusion */
+      if (kernel_data.integrator.use_ambient_occlusion) {
+        enqueue_flag = 1;
+      }
+    }
+#endif /* __AO__ */
 
 #ifndef __COMPUTE_DEVICE_GPU__
-	}
+  }
 #endif
 
 #ifdef __AO__
-	/* Enqueue to-shadow-ray-cast rays. */
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_SHADOW_RAY_CAST_AO_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        &locals->queue_atomics_ao,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
+  /* Enqueue to-shadow-ray-cast rays. */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          &locals->queue_atomics_ao,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 #endif
 }
 
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index 4cf88a02590..b1c65f61e2c 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -18,48 +18,50 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device void kernel_indirect_background(KernelGlobals *kg)
 {
-	ccl_global char *ray_state = kernel_split_state.ray_state;
+  ccl_global char *ray_state = kernel_split_state.ray_state;
 
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	int ray_index;
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  int ray_index;
 
-	if(kernel_data.integrator.ao_bounces != INT_MAX) {
-		ray_index = get_ray_index(kg, thread_index,
-		                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-		                          kernel_split_state.queue_data,
-		                          kernel_split_params.queue_size,
-		                          0);
+  if (kernel_data.integrator.ao_bounces != INT_MAX) {
+    ray_index = get_ray_index(kg,
+                              thread_index,
+                              QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
+                              0);
 
-		if(ray_index != QUEUE_EMPTY_SLOT) {
-			if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-				ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-				if(path_state_ao_bounce(kg, state)) {
-					kernel_split_path_end(kg, ray_index);
-				}
-			}
-		}
-	}
+    if (ray_index != QUEUE_EMPTY_SLOT) {
+      if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+        ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+        if (path_state_ao_bounce(kg, state)) {
+          kernel_split_path_end(kg, ray_index);
+        }
+      }
+    }
+  }
 
-	ray_index = get_ray_index(kg, thread_index,
-	                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
+  ray_index = get_ray_index(kg,
+                            thread_index,
+                            QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
 
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 
-	if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		float3 throughput = kernel_split_state.throughput[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
+  if (IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+    ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+    float3 throughput = kernel_split_state.throughput[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
 
-		kernel_path_background(kg, state, ray, throughput, sd, L);
-		kernel_split_path_end(kg, ray_index);
-	}
+    kernel_path_background(kg, state, ray, throughput, sd, L);
+    kernel_split_path_end(kg, ray_index);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
index 236c94e983c..3f48f8d6f56 100644
--- a/intern/cycles/kernel/split/kernel_indirect_subsurface.h
+++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
@@ -18,53 +18,50 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
 {
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(thread_index == 0) {
-		/* We will empty both queues in this kernel. */
-		kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
-		kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
-	}
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (thread_index == 0) {
+    /* We will empty both queues in this kernel. */
+    kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+    kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+  }
 
-	int ray_index;
-	get_ray_index(kg, thread_index,
-	              QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	              kernel_split_state.queue_data,
-	              kernel_split_params.queue_size,
-	              1);
-	ray_index = get_ray_index(kg, thread_index,
-	                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
+  int ray_index;
+  get_ray_index(kg,
+                thread_index,
+                QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                kernel_split_state.queue_data,
+                kernel_split_params.queue_size,
+                1);
+  ray_index = get_ray_index(kg,
+                            thread_index,
+                            QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
 
 #ifdef __SUBSURFACE__
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-	ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+  ccl_global char *ray_state = kernel_split_state.ray_state;
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+  ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
 
-	if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
-		ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+  if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+    ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
 
-		/* Trace indirect subsurface rays by restarting the loop. this uses less
-		 * stack memory than invoking kernel_path_indirect.
-		 */
-		if(ss_indirect->num_rays) {
-			kernel_path_subsurface_setup_indirect(kg,
-			                                      ss_indirect,
-			                                      state,
-			                                      ray,
-			                                      L,
-			                                      throughput);
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-		}
-	}
-#endif  /* __SUBSURFACE__ */
+    /* Trace indirect subsurface rays by restarting the loop. this uses less
+     * stack memory than invoking kernel_path_indirect.
+     */
+    if (ss_indirect->num_rays) {
+      kernel_path_subsurface_setup_indirect(kg, ss_indirect, state, ray, L, throughput);
+      ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+    }
+  }
+#endif /* __SUBSURFACE__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index 5b2c554b922..7ecb099208d 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -23,45 +23,45 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_lamp_emission(KernelGlobals *kg)
 {
 #ifndef __VOLUME__
-	/* We will empty this queue in this kernel. */
-	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
-		kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
-	}
+  /* We will empty this queue in this kernel. */
+  if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+    kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+  }
 #endif
-	/* Fetch use_queues_flag. */
-	char local_use_queues_flag = *kernel_split_params.use_queues_flag;
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  /* Fetch use_queues_flag. */
+  char local_use_queues_flag = *kernel_split_params.use_queues_flag;
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(local_use_queues_flag) {
-		ray_index = get_ray_index(kg, ray_index,
-		                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-		                          kernel_split_state.queue_data,
-		                          kernel_split_params.queue_size,
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (local_use_queues_flag) {
+    ray_index = get_ray_index(kg,
+                              ray_index,
+                              QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
 #ifndef __VOLUME__
-		                          1
+                              1
 #else
-		                          0
+                              0
 #endif
-		                          );
-		if(ray_index == QUEUE_EMPTY_SLOT) {
-			return;
-		}
-	}
+    );
+    if (ray_index == QUEUE_EMPTY_SLOT) {
+      return;
+    }
+  }
 
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
-	   IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND))
-	{
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
 
-		float3 throughput = kernel_split_state.throughput[ray_index];
-		Ray ray = kernel_split_state.ray[ray_index];
-		ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
+    float3 throughput = kernel_split_state.throughput[ray_index];
+    Ray ray = kernel_split_state.ray[ray_index];
+    ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
 
-		kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
-	}
+    kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index e388955f1af..781ce869374 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -48,217 +48,211 @@ CCL_NAMESPACE_BEGIN
 #ifdef __BRANCHED_PATH__
 ccl_device_inline void kernel_split_branched_indirect_light_init(KernelGlobals *kg, int ray_index)
 {
-	kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+  kernel_split_branched_path_indirect_loop_init(kg, ray_index);
 
-	ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT);
+  ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT);
 }
 
 ccl_device void kernel_split_branched_transparent_bounce(KernelGlobals *kg, int ray_index)
 {
-	ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-	ShaderData *sd = kernel_split_sd(sd, ray_index);
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+  ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+  ShaderData *sd = kernel_split_sd(sd, ray_index);
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
 
 #  ifdef __VOLUME__
-	if(!(sd->flag & SD_HAS_ONLY_VOLUME)) {
+  if (!(sd->flag & SD_HAS_ONLY_VOLUME)) {
 #  endif
-		/* continue in case of transparency */
-		*throughput *= shader_bsdf_transparency(kg, sd);
+    /* continue in case of transparency */
+    *throughput *= shader_bsdf_transparency(kg, sd);
 
-		if(is_zero(*throughput)) {
-			kernel_split_path_end(kg, ray_index);
-			return;
-		}
+    if (is_zero(*throughput)) {
+      kernel_split_path_end(kg, ray_index);
+      return;
+    }
 
-		/* Update Path State */
-		path_state_next(kg, state, LABEL_TRANSPARENT);
+    /* Update Path State */
+    path_state_next(kg, state, LABEL_TRANSPARENT);
 #  ifdef __VOLUME__
-	}
-	else {
-		if(!path_state_volume_next(kg, state)) {
-			kernel_split_path_end(kg, ray_index);
-			return;
-		}
-	}
+  }
+  else {
+    if (!path_state_volume_next(kg, state)) {
+      kernel_split_path_end(kg, ray_index);
+      return;
+    }
+  }
 #  endif
 
-	ray->P = ray_offset(sd->P, -sd->Ng);
-	ray->t -= sd->ray_length; /* clipping works through transparent */
+  ray->P = ray_offset(sd->P, -sd->Ng);
+  ray->t -= sd->ray_length; /* clipping works through transparent */
 
 #  ifdef __RAY_DIFFERENTIALS__
-	ray->dP = sd->dP;
-	ray->dD.dx = -sd->dI.dx;
-	ray->dD.dy = -sd->dI.dy;
-#  endif  /* __RAY_DIFFERENTIALS__ */
+  ray->dP = sd->dP;
+  ray->dD.dx = -sd->dI.dx;
+  ray->dD.dy = -sd->dI.dy;
+#  endif /* __RAY_DIFFERENTIALS__ */
 
 #  ifdef __VOLUME__
-	/* enter/exit volume */
-	kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
-#  endif  /* __VOLUME__ */
+  /* enter/exit volume */
+  kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+#  endif /* __VOLUME__ */
 }
-#endif  /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
 
 ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
                                             ccl_local_param unsigned int *local_queue_atomics)
 {
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
-		/* If we are here, then it means that scene-intersect kernel
-		* has already been executed atleast once. From the next time,
-		* scene-intersect kernel may operate on queues to fetch ray index
-		*/
-		*kernel_split_params.use_queues_flag = 1;
-
-		/* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and
-		 * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the
-		 * previous kernel.
-		 */
-		kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
-		kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
-	}
-
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
-
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-
-#  ifdef __VOLUME__
-	/* Reactivate only volume rays here, most surface work was skipped. */
-	if(IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE);
-	}
-#  endif
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+    /* If we are here, then it means that scene-intersect kernel
+    * has already been executed atleast once. From the next time,
+    * scene-intersect kernel may operate on queues to fetch ray index
+    */
+    *kernel_split_params.use_queues_flag = 1;
+
+    /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and
+     * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the
+     * previous kernel.
+     */
+    kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
+    kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
+  }
+
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
+
+  ccl_global char *ray_state = kernel_split_state.ray_state;
+
+#ifdef __VOLUME__
+  /* Reactivate only volume rays here, most surface work was skipped. */
+  if (IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE);
+  }
+#endif
 
-	bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE);
-	if(active) {
-		ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE);
+  if (active) {
+    ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+    ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
 
 #ifdef __BRANCHED_PATH__
-		if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+    if (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
 #endif
-			/* Compute direct lighting and next bounce. */
-			if(!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) {
-				kernel_split_path_end(kg, ray_index);
-			}
+      /* Compute direct lighting and next bounce. */
+      if (!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) {
+        kernel_split_path_end(kg, ray_index);
+      }
 #ifdef __BRANCHED_PATH__
-		}
-		else if(sd->flag & SD_HAS_ONLY_VOLUME) {
-			kernel_split_branched_transparent_bounce(kg, ray_index);
-		}
-		else {
-			kernel_split_branched_indirect_light_init(kg, ray_index);
-
-			if(kernel_split_branched_path_surface_indirect_light_iter(kg,
-			                                                          ray_index,
-			                                                          1.0f,
-			                                                          kernel_split_sd(branched_state_sd, ray_index),
-			                                                          true,
-			                                                          true))
-			{
-				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-			}
-			else {
-				kernel_split_branched_path_indirect_loop_end(kg, ray_index);
-				kernel_split_branched_transparent_bounce(kg, ray_index);
-			}
-		}
-#endif  /* __BRANCHED_PATH__ */
-	}
-
-	/* Enqueue RAY_UPDATE_BUFFER rays. */
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                        IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active,
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
+    }
+    else if (sd->flag & SD_HAS_ONLY_VOLUME) {
+      kernel_split_branched_transparent_bounce(kg, ray_index);
+    }
+    else {
+      kernel_split_branched_indirect_light_init(kg, ray_index);
+
+      if (kernel_split_branched_path_surface_indirect_light_iter(
+              kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) {
+        ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+      }
+      else {
+        kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+        kernel_split_branched_transparent_bounce(kg, ray_index);
+      }
+    }
+#endif /* __BRANCHED_PATH__ */
+  }
+
+  /* Enqueue RAY_UPDATE_BUFFER rays. */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                          IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active,
+                          kernel_split_params.queue_size,
+                          local_queue_atomics,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 
 #ifdef __BRANCHED_PATH__
-	/* iter loop */
-	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
-		kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0;
-	}
-
-	ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
-	                          QUEUE_LIGHT_INDIRECT_ITER,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
-
-	if(IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) {
-		/* for render passes, sum and reset indirect light pass variables
-		 * for the next samples */
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-
-		path_radiance_sum_indirect(L);
-		path_radiance_reset_indirect(L);
-
-		if(kernel_split_branched_path_surface_indirect_light_iter(kg,
-		                                                          ray_index,
-		                                                          1.0f,
-		                                                          kernel_split_sd(branched_state_sd, ray_index),
-		                                                          true,
-		                                                          true))
-		{
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-		}
-		else {
-			kernel_split_branched_path_indirect_loop_end(kg, ray_index);
-			kernel_split_branched_transparent_bounce(kg, ray_index);
-		}
-	}
+  /* iter loop */
+  if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+    kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0;
+  }
+
+  ray_index = get_ray_index(kg,
+                            ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+                            QUEUE_LIGHT_INDIRECT_ITER,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
+
+  if (IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) {
+    /* for render passes, sum and reset indirect light pass variables
+     * for the next samples */
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+    path_radiance_sum_indirect(L);
+    path_radiance_reset_indirect(L);
+
+    if (kernel_split_branched_path_surface_indirect_light_iter(
+            kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) {
+      ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+    }
+    else {
+      kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+      kernel_split_branched_transparent_bounce(kg, ray_index);
+    }
+  }
 
 #  ifdef __VOLUME__
-	/* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_VOLUME_INDIRECT_ITER,
-	                        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER),
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
-
-#  endif  /* __VOLUME__ */
+  /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  enqueue_ray_index_local(
+      ray_index,
+      QUEUE_VOLUME_INDIRECT_ITER,
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER),
+      kernel_split_params.queue_size,
+      local_queue_atomics,
+      kernel_split_state.queue_data,
+      kernel_split_params.queue_index);
+
+#  endif /* __VOLUME__ */
 
 #  ifdef __SUBSURFACE__
-	/* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_SUBSURFACE_INDIRECT_ITER,
-	                        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER),
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
-#  endif  /* __SUBSURFACE__ */
-#endif  /* __BRANCHED_PATH__ */
+  /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  enqueue_ray_index_local(
+      ray_index,
+      QUEUE_SUBSURFACE_INDIRECT_ITER,
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER),
+      kernel_split_params.queue_size,
+      local_queue_atomics,
+      kernel_split_state.queue_data,
+      kernel_split_params.queue_index);
+#  endif /* __SUBSURFACE__ */
+#endif   /* __BRANCHED_PATH__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
index fdd54225b07..3faa3208341 100644
--- a/intern/cycles/kernel/split/kernel_path_init.h
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -21,61 +21,59 @@ CCL_NAMESPACE_BEGIN
  *
  * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
  */
-ccl_device void kernel_path_init(KernelGlobals *kg) {
-	int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
+ccl_device void kernel_path_init(KernelGlobals *kg)
+{
+  int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
 
-	/* This is the first assignment to ray_state;
-	 * So we dont use ASSIGN_RAY_STATE macro.
-	 */
-	kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
+  /* This is the first assignment to ray_state;
+   * So we dont use ASSIGN_RAY_STATE macro.
+   */
+  kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
 
-	/* Get work. */
-	ccl_global uint *work_pools = kernel_split_params.work_pools;
-	uint total_work_size = kernel_split_params.total_work_size;
-	uint work_index;
+  /* Get work. */
+  ccl_global uint *work_pools = kernel_split_params.work_pools;
+  uint total_work_size = kernel_split_params.total_work_size;
+  uint work_index;
 
-	if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
-		/* No more work, mark ray as inactive */
-		kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
+  if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
+    /* No more work, mark ray as inactive */
+    kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
 
-		return;
-	}
+    return;
+  }
 
-	ccl_global WorkTile *tile = &kernel_split_params.tile;
-	uint x, y, sample;
-	get_work_pixel(tile, work_index, &x, &y, &sample);
+  ccl_global WorkTile *tile = &kernel_split_params.tile;
+  uint x, y, sample;
+  get_work_pixel(tile, work_index, &x, &y, &sample);
 
-	/* Store buffer offset for writing to passes. */
-	uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
-	kernel_split_state.buffer_offset[ray_index] = buffer_offset;
+  /* Store buffer offset for writing to passes. */
+  uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+  kernel_split_state.buffer_offset[ray_index] = buffer_offset;
 
-	/* Initialize random numbers and ray. */
-	uint rng_hash;
-	kernel_path_trace_setup(kg,
-	                        sample,
-	                        x, y,
-	                        &rng_hash,
-	                        &kernel_split_state.ray[ray_index]);
+  /* Initialize random numbers and ray. */
+  uint rng_hash;
+  kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &kernel_split_state.ray[ray_index]);
 
-	if(kernel_split_state.ray[ray_index].t != 0.0f) {
-		/* Initialize throughput, path radiance, Ray, PathState;
-		 * These rays proceed with path-iteration.
-		 */
-		kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
-		path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
-		path_state_init(kg,
-		                AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
-		                &kernel_split_state.path_state[ray_index],
-		                rng_hash,
-		                sample,
-		                &kernel_split_state.ray[ray_index]);
+  if (kernel_split_state.ray[ray_index].t != 0.0f) {
+    /* Initialize throughput, path radiance, Ray, PathState;
+     * These rays proceed with path-iteration.
+     */
+    kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
+    path_radiance_init(&kernel_split_state.path_radiance[ray_index],
+                       kernel_data.film.use_light_pass);
+    path_state_init(kg,
+                    AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+                    &kernel_split_state.path_state[ray_index],
+                    rng_hash,
+                    sample,
+                    &kernel_split_state.ray[ray_index]);
 #ifdef __SUBSURFACE__
-		kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+    kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
 #endif
-	}
-	else {
-		ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
-	}
+  }
+  else {
+    ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index df67fabab19..2db87f7a671 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -35,58 +35,53 @@ CCL_NAMESPACE_BEGIN
  *   - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
  *     RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays.
  */
-ccl_device void kernel_queue_enqueue(KernelGlobals *kg,
-                                     ccl_local_param QueueEnqueueLocals *locals)
+ccl_device void kernel_queue_enqueue(KernelGlobals *kg, ccl_local_param QueueEnqueueLocals *locals)
 {
-	/* We have only 2 cases (Hit/Not-Hit) */
-	int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  /* We have only 2 cases (Hit/Not-Hit) */
+  int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 
-	if(lidx == 0) {
-		locals->queue_atomics[0] = 0;
-		locals->queue_atomics[1] = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (lidx == 0) {
+    locals->queue_atomics[0] = 0;
+    locals->queue_atomics[1] = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int queue_number = -1;
+  int queue_number = -1;
 
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
-	   IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
-	   IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
-		queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
-	}
-	else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
-	        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) ||
-	        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
-		queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
-	}
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
+    queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
+  }
+  else if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+           IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) ||
+           IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
+    queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
+  }
 
-	unsigned int my_lqidx;
-	if(queue_number != -1) {
-		my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics);
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  unsigned int my_lqidx;
+  if (queue_number != -1) {
+    my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics);
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	if(lidx == 0) {
-		locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] =
-		        get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-		                                    locals->queue_atomics,
-		                                    kernel_split_params.queue_index);
-		locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] =
-		        get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-		                                    locals->queue_atomics,
-		                                    kernel_split_params.queue_index);
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (lidx == 0) {
+    locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = get_global_per_queue_offset(
+        QUEUE_ACTIVE_AND_REGENERATED_RAYS, locals->queue_atomics, kernel_split_params.queue_index);
+    locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = get_global_per_queue_offset(
+        QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+        locals->queue_atomics,
+        kernel_split_params.queue_index);
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	unsigned int my_gqidx;
-	if(queue_number != -1) {
-		my_gqidx = get_global_queue_index(queue_number,
-		                                  kernel_split_params.queue_size,
-		                                  my_lqidx,
-		                                  locals->queue_atomics);
-		kernel_split_state.queue_data[my_gqidx] = ray_index;
-	}
+  unsigned int my_gqidx;
+  if (queue_number != -1) {
+    my_gqidx = get_global_queue_index(
+        queue_number, kernel_split_params.queue_size, my_lqidx, locals->queue_atomics);
+    kernel_split_state.queue_data[my_gqidx] = ray_index;
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index f5378bc172b..5fef3e045f8 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -25,55 +25,56 @@ CCL_NAMESPACE_BEGIN
  */
 ccl_device void kernel_scene_intersect(KernelGlobals *kg)
 {
-	/* Fetch use_queues_flag */
-	char local_use_queues_flag = *kernel_split_params.use_queues_flag;
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  /* Fetch use_queues_flag */
+  char local_use_queues_flag = *kernel_split_params.use_queues_flag;
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(local_use_queues_flag) {
-		ray_index = get_ray_index(kg, ray_index,
-		                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-		                          kernel_split_state.queue_data,
-		                          kernel_split_params.queue_size,
-		                          0);
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (local_use_queues_flag) {
+    ray_index = get_ray_index(kg,
+                              ray_index,
+                              QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
+                              0);
 
-		if(ray_index == QUEUE_EMPTY_SLOT) {
-			return;
-		}
-	}
+    if (ray_index == QUEUE_EMPTY_SLOT) {
+      return;
+    }
+  }
 
-	/* All regenerated rays become active here */
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
+  /* All regenerated rays become active here */
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
 #ifdef __BRANCHED_PATH__
-		if(kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) {
-			kernel_split_path_end(kg, ray_index);
-		}
-		else
-#endif  /* __BRANCHED_PATH__ */
-		{
-			ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
-		}
-	}
+    if (kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) {
+      kernel_split_path_end(kg, ray_index);
+    }
+    else
+#endif /* __BRANCHED_PATH__ */
+    {
+      ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+    }
+  }
 
-	if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
-		return;
-	}
+  if (!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+    return;
+  }
 
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	Ray ray = kernel_split_state.ray[ray_index];
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  Ray ray = kernel_split_state.ray[ray_index];
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
 
-	Intersection isect;
-	bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L);
-	kernel_split_state.isect[ray_index] = isect;
+  Intersection isect;
+  bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L);
+  kernel_split_state.isect[ray_index] = isect;
 
-	if(!hit) {
-		/* Change the state of rays that hit the background;
-		 * These rays undergo special processing in the
-		 * background_bufferUpdate kernel.
-		 */
-		ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
-	}
+  if (!hit) {
+    /* Change the state of rays that hit the background;
+     * These rays undergo special processing in the
+     * background_bufferUpdate kernel.
+     */
+    ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 2bc2d300699..8e39c9797e5 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -22,45 +22,46 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_shader_eval(KernelGlobals *kg)
 {
 
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	/* Sorting on cuda split is not implemented */
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  /* Sorting on cuda split is not implemented */
 #ifdef __KERNEL_CUDA__
-	int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+  int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
 #else
-	int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS];
+  int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS];
 #endif
-	if(ray_index >= queue_index) {
-		return;
-	}
-	ray_index = get_ray_index(kg, ray_index,
+  if (ray_index >= queue_index) {
+    return;
+  }
+  ray_index = get_ray_index(kg,
+                            ray_index,
 #ifdef __KERNEL_CUDA__
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
 #else
-	                          QUEUE_SHADER_SORTED_RAYS,
+                            QUEUE_SHADER_SORTED_RAYS,
 #endif
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
 
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  ccl_global char *ray_state = kernel_split_state.ray_state;
+  if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
 
-		shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag);
+    shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag);
 #ifdef __BRANCHED_PATH__
-		if(kernel_data.integrator.branched) {
-			shader_merge_closures(kernel_split_sd(sd, ray_index));
-		}
-		else
+    if (kernel_data.integrator.branched) {
+      shader_merge_closures(kernel_split_sd(sd, ray_index));
+    }
+    else
 #endif
-		{
-			shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
-		}
-	}
+    {
+      shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
+    }
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_setup.h b/intern/cycles/kernel/split/kernel_shader_setup.h
index ea3ec2ec83f..da332db2c98 100644
--- a/intern/cycles/kernel/split/kernel_shader_setup.h
+++ b/intern/cycles/kernel/split/kernel_shader_setup.h
@@ -25,54 +25,52 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_shader_setup(KernelGlobals *kg,
                                     ccl_local_param unsigned int *local_queue_atomics)
 {
-	/* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
-	if(ray_index >= queue_index) {
-		return;
-	}
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+  if (ray_index >= queue_index) {
+    return;
+  }
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
 
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 
-	char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0;
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
+  char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 :
+                                                                                               0;
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          local_queue_atomics,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 
-	/* Continue on with shader evaluation. */
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
-		Intersection isect = kernel_split_state.isect[ray_index];
-		Ray ray = kernel_split_state.ray[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
+  /* Continue on with shader evaluation. */
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+    Intersection isect = kernel_split_state.isect[ray_index];
+    Ray ray = kernel_split_state.ray[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
 
-		shader_setup_from_ray(kg,
-		                      sd,
-		                      &isect,
-		                      &ray);
+    shader_setup_from_ray(kg, sd, &isect, &ray);
 
 #ifdef __VOLUME__
-		if(sd->flag & SD_HAS_ONLY_VOLUME) {
-			ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME);
-		}
+    if (sd->flag & SD_HAS_ONLY_VOLUME) {
+      ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME);
+    }
 #endif
-	}
-
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h
index 666355de334..95d33a42014 100644
--- a/intern/cycles/kernel/split/kernel_shader_sort.h
+++ b/intern/cycles/kernel/split/kernel_shader_sort.h
@@ -16,82 +16,82 @@
 
 CCL_NAMESPACE_BEGIN
 
-
-ccl_device void kernel_shader_sort(KernelGlobals *kg,
-                                   ccl_local_param ShaderSortLocals *locals)
+ccl_device void kernel_shader_sort(KernelGlobals *kg, ccl_local_param ShaderSortLocals *locals)
 {
 #ifndef __KERNEL_CUDA__
-	int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
-	if(tid == 0) {
-		kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize;
-	}
+  int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+  if (tid == 0) {
+    kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize;
+  }
 
-	uint offset = (tid/SHADER_SORT_LOCAL_SIZE)*SHADER_SORT_BLOCK_SIZE;
-	if(offset >= qsize) {
-		return;
-	}
+  uint offset = (tid / SHADER_SORT_LOCAL_SIZE) * SHADER_SORT_BLOCK_SIZE;
+  if (offset >= qsize) {
+    return;
+  }
 
-	int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
-	uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size);
-	uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size);
-	ccl_local uint *local_value = &locals->local_value[0];
-	ccl_local ushort *local_index = &locals->local_index[0];
+  int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
+  uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size);
+  uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size);
+  ccl_local uint *local_value = &locals->local_value[0];
+  ccl_local ushort *local_index = &locals->local_index[0];
 
-	/* copy to local memory */
-	for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
-		uint idx = offset + i + lid;
-		uint add = input + idx;
-		uint value = (~0);
-		if(idx < qsize) {
-			int ray_index = kernel_split_state.queue_data[add];
-			bool valid = (ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
-			if(valid) {
-				value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK;
-			}
-		}
-		local_value[i + lid] = value;
-		local_index[i + lid] = i + lid;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  /* copy to local memory */
+  for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+    uint idx = offset + i + lid;
+    uint add = input + idx;
+    uint value = (~0);
+    if (idx < qsize) {
+      int ray_index = kernel_split_state.queue_data[add];
+      bool valid = (ray_index != QUEUE_EMPTY_SLOT) &&
+                   IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+      if (valid) {
+        value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK;
+      }
+    }
+    local_value[i + lid] = value;
+    local_index[i + lid] = i + lid;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	/* skip sorting for cpu split kernel */
+  /* skip sorting for cpu split kernel */
 #  ifdef __KERNEL_OPENCL__
 
-	/* bitonic sort */
-	for(uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
-		for(uint inc = length; inc > 0; inc >>= 1) {
-			for(uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
-				uint i = lid + ii;
-				bool direction = ((i & (length << 1)) != 0);
-				uint j = i ^ inc;
-				ushort ioff = local_index[i];
-				ushort joff = local_index[j];
-				uint iKey = local_value[ioff];
-				uint jKey = local_value[joff];
-				bool smaller = (jKey < iKey) || (jKey == iKey && j < i);
-				bool swap = smaller ^ (j < i) ^ direction;
-				ccl_barrier(CCL_LOCAL_MEM_FENCE);
-				local_index[i] = (swap) ? joff : ioff;
-				local_index[j] = (swap) ? ioff : joff;
-				ccl_barrier(CCL_LOCAL_MEM_FENCE);
-			}
-		}
-	}
-#  endif  /* __KERNEL_OPENCL__ */
+  /* bitonic sort */
+  for (uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
+    for (uint inc = length; inc > 0; inc >>= 1) {
+      for (uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
+        uint i = lid + ii;
+        bool direction = ((i & (length << 1)) != 0);
+        uint j = i ^ inc;
+        ushort ioff = local_index[i];
+        ushort joff = local_index[j];
+        uint iKey = local_value[ioff];
+        uint jKey = local_value[joff];
+        bool smaller = (jKey < iKey) || (jKey == iKey && j < i);
+        bool swap = smaller ^ (j < i) ^ direction;
+        ccl_barrier(CCL_LOCAL_MEM_FENCE);
+        local_index[i] = (swap) ? joff : ioff;
+        local_index[j] = (swap) ? ioff : joff;
+        ccl_barrier(CCL_LOCAL_MEM_FENCE);
+      }
+    }
+  }
+#  endif /* __KERNEL_OPENCL__ */
 
-	/* copy to destination */
-	for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
-		uint idx = offset + i + lid;
-		uint lidx = local_index[i + lid];
-		uint outi = output + idx;
-		uint ini = input + offset + lidx;
-		uint value = local_value[lidx];
-		if(idx < qsize) {
-			kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT : kernel_split_state.queue_data[ini];
-		}
-	}
-#endif  /* __KERNEL_CUDA__ */
+  /* copy to destination */
+  for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+    uint idx = offset + i + lid;
+    uint lidx = local_index[i + lid];
+    uint outi = output + idx;
+    uint ini = input + offset + lidx;
+    uint value = local_value[lidx];
+    if (idx < qsize) {
+      kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT :
+                                                              kernel_split_state.queue_data[ini];
+    }
+  }
+#endif /* __KERNEL_CUDA__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index fb08112503a..5d772fc597b 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -19,35 +19,40 @@ CCL_NAMESPACE_BEGIN
 /* Shadow ray cast for AO. */
 ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
 {
-	unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = QUEUE_EMPTY_SLOT;
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(thread_index < ao_queue_length) {
-		ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS,
-		                          kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
-	}
+  int ray_index = QUEUE_EMPTY_SLOT;
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (thread_index < ao_queue_length) {
+    ray_index = get_ray_index(kg,
+                              thread_index,
+                              QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
+                              1);
+  }
 
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 
-	ShaderData *sd = kernel_split_sd(sd, ray_index);
-	ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	float3 throughput = kernel_split_state.throughput[ray_index];
+  ShaderData *sd = kernel_split_sd(sd, ray_index);
+  ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  float3 throughput = kernel_split_state.throughput[ray_index];
 
 #ifdef __BRANCHED_PATH__
-	if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+  if (!kernel_data.integrator.branched ||
+      IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
 #endif
-		kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd));
+    kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd));
 #ifdef __BRANCHED_PATH__
-	}
-	else {
-		kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput);
-	}
+  }
+  else {
+    kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput);
+  }
 #endif
 }
 
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index da072fd5f1a..82990ce9fae 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -19,89 +19,80 @@ CCL_NAMESPACE_BEGIN
 /* Shadow ray cast for direct visible light. */
 ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
 {
-	unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = QUEUE_EMPTY_SLOT;
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(thread_index < dl_queue_length) {
-		ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_DL_RAYS,
-		                          kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
-	}
+  int ray_index = QUEUE_EMPTY_SLOT;
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (thread_index < dl_queue_length) {
+    ray_index = get_ray_index(kg,
+                              thread_index,
+                              QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
+                              1);
+  }
 
 #ifdef __BRANCHED_PATH__
-	/* TODO(mai): move this somewhere else? */
-	if(thread_index == 0) {
-		/* Clear QUEUE_INACTIVE_RAYS before next kernel. */
-		kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0;
-	}
-#endif  /* __BRANCHED_PATH__ */
+  /* TODO(mai): move this somewhere else? */
+  if (thread_index == 0) {
+    /* Clear QUEUE_INACTIVE_RAYS before next kernel. */
+    kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0;
+  }
+#endif /* __BRANCHED_PATH__ */
 
-	if(ray_index == QUEUE_EMPTY_SLOT)
-		return;
+  if (ray_index == QUEUE_EMPTY_SLOT)
+    return;
 
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	Ray ray = kernel_split_state.light_ray[ray_index];
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ShaderData *sd = kernel_split_sd(sd, ray_index);
-	float3 throughput = kernel_split_state.throughput[ray_index];
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  Ray ray = kernel_split_state.light_ray[ray_index];
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ShaderData *sd = kernel_split_sd(sd, ray_index);
+  float3 throughput = kernel_split_state.throughput[ray_index];
 
-	BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
-	ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-	bool is_lamp = kernel_split_state.is_lamp[ray_index];
+  BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
+  ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+  bool is_lamp = kernel_split_state.is_lamp[ray_index];
 
-#  if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
-	bool use_branched = false;
-	int all = 0;
+#if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
+  bool use_branched = false;
+  int all = 0;
 
-	if(state->flag & PATH_RAY_SHADOW_CATCHER) {
-		use_branched = true;
-		all = 1;
-	}
-#    if defined(__BRANCHED_PATH__)
-	else if(kernel_data.integrator.branched) {
-		use_branched = true;
+  if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+    use_branched = true;
+    all = 1;
+  }
+#  if defined(__BRANCHED_PATH__)
+  else if (kernel_data.integrator.branched) {
+    use_branched = true;
 
-		if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
-			all = (kernel_data.integrator.sample_all_lights_indirect);
-		}
-		else
-		{
-			all = (kernel_data.integrator.sample_all_lights_direct);
-		}
-	}
-#    endif  /* __BRANCHED_PATH__ */
+    if (IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+      all = (kernel_data.integrator.sample_all_lights_indirect);
+    }
+    else {
+      all = (kernel_data.integrator.sample_all_lights_direct);
+    }
+  }
+#  endif /* __BRANCHED_PATH__ */
 
-	if(use_branched) {
-		kernel_branched_path_surface_connect_light(kg,
-		                                           sd,
-		                                           emission_sd,
-		                                           state,
-		                                           throughput,
-		                                           1.0f,
-		                                           L,
-		                                           all);
-	}
-	else
-#  endif  /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/
-	{
-		/* trace shadow ray */
-		float3 shadow;
+  if (use_branched) {
+    kernel_branched_path_surface_connect_light(
+        kg, sd, emission_sd, state, throughput, 1.0f, L, all);
+  }
+  else
+#endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/
+  {
+    /* trace shadow ray */
+    float3 shadow;
 
-		if(!shadow_blocked(kg,
-		                   sd,
-		                   emission_sd,
-		                   state,
-		                   &ray,
-		                   &shadow))
-		{
-			/* accumulate */
-			path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
-		}
-		else {
-			path_radiance_accum_total_light(L, state, throughput, &L_light);
-		}
-	}
+    if (!shadow_blocked(kg, sd, emission_sd, state, &ray, &shadow)) {
+      /* accumulate */
+      path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+    }
+    else {
+      path_radiance_accum_total_light(L, state, throughput, &L_light);
+    }
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 4b86696691a..384bc952460 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef  __KERNEL_SPLIT_H__
-#define  __KERNEL_SPLIT_H__
+#ifndef __KERNEL_SPLIT_H__
+#define __KERNEL_SPLIT_H__
 
 #include "kernel/kernel_math.h"
 #include "kernel/kernel_types.h"
@@ -57,47 +57,48 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
 {
-	ccl_global char *ray_state = kernel_split_state.ray_state;
+  ccl_global char *ray_state = kernel_split_state.ray_state;
 
 #ifdef __BRANCHED_PATH__
 #  ifdef __SUBSURFACE__
-	ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
-
-	if(ss_indirect->num_rays) {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-	}
-	else
-#  endif  /* __SUBSURFACE__ */
-	if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
-		int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
-
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-		PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
-
-		path_radiance_sum_indirect(L);
-		path_radiance_accum_sample(orig_ray_L, L);
-
-		atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
-
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
-	}
-	else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
-	}
-	else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER);
-	}
-	else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER);
-	}
-	else {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-	}
+  ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+
+  if (ss_indirect->num_rays) {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+  }
+  else
+#  endif /* __SUBSURFACE__ */
+      if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
+    int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
+
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+    PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
+
+    path_radiance_sum_indirect(L);
+    path_radiance_accum_sample(orig_ray_L, L);
+
+    atomic_fetch_and_dec_uint32(
+        (ccl_global uint *)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
+
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+  }
+  else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
+  }
+  else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER);
+  }
+  else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER);
+  }
+  else {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+  }
 #else
-	ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+  ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
 #endif
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_SPLIT_H__ */
+#endif /* __KERNEL_SPLIT_H__ */
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 3f6b3977d79..433b1221a37 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -24,22 +24,22 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
 {
-	(void) kg;  /* Unused on CPU. */
+  (void)kg; /* Unused on CPU. */
 
-	uint64_t size = 0;
-#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
-	size = size SPLIT_DATA_ENTRIES;
+  uint64_t size = 0;
+#define SPLIT_DATA_ENTRY(type, name, num) +align_up(num_elements *num * sizeof(type), 16)
+  size = size SPLIT_DATA_ENTRIES;
 #undef SPLIT_DATA_ENTRY
 
-	uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+  uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1);
 
 #ifdef __BRANCHED_PATH__
-	size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+  size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
 #endif
 
-	size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+  size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
 
-	return size;
+  return size;
 }
 
 ccl_device_inline void split_data_init(KernelGlobals *kg,
@@ -48,28 +48,29 @@ ccl_device_inline void split_data_init(KernelGlobals *kg,
                                        ccl_global void *data,
                                        ccl_global char *ray_state)
 {
-	(void) kg;  /* Unused on CPU. */
+  (void)kg; /* Unused on CPU. */
 
-	ccl_global char *p = (ccl_global char*)data;
+  ccl_global char *p = (ccl_global char *)data;
 
 #define SPLIT_DATA_ENTRY(type, name, num) \
-	split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16);
-	SPLIT_DATA_ENTRIES;
+  split_data->name = (type *)p; \
+  p += align_up(num_elements * num * sizeof(type), 16);
+  SPLIT_DATA_ENTRIES;
 #undef SPLIT_DATA_ENTRY
 
-	uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+  uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1);
 
 #ifdef __BRANCHED_PATH__
-	split_data->_branched_state_sd = (ShaderData*)p;
-	p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+  split_data->_branched_state_sd = (ShaderData *)p;
+  p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
 #endif
 
-	split_data->_sd = (ShaderData*)p;
-	p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+  split_data->_sd = (ShaderData *)p;
+  p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
 
-	split_data->ray_state = ray_state;
+  split_data->ray_state = ray_state;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_SPLIT_DATA_H__ */
+#endif /* __KERNEL_SPLIT_DATA_H__ */
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 83df1e2a0a6..6ff3f5bdb55 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -22,17 +22,17 @@ CCL_NAMESPACE_BEGIN
 /* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */
 
 typedef struct SplitParams {
-	WorkTile tile;
-	uint total_work_size;
+  WorkTile tile;
+  uint total_work_size;
 
-	ccl_global unsigned int *work_pools;
+  ccl_global unsigned int *work_pools;
 
-	ccl_global int *queue_index;
-	int queue_size;
-	ccl_global char *use_queues_flag;
+  ccl_global int *queue_index;
+  int queue_size;
+  ccl_global char *use_queues_flag;
 
-	/* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
-	int dummy_sd_flag;
+  /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
+  int dummy_sd_flag;
 } SplitParams;
 
 /* Global memory variables [porting]; These memory is used for
@@ -46,98 +46,98 @@ typedef struct SplitParams {
 #ifdef __BRANCHED_PATH__
 
 typedef ccl_global struct SplitBranchedState {
-	/* various state that must be kept and restored after an indirect loop */
-	PathState path_state;
-	float3 throughput;
-	Ray ray;
+  /* various state that must be kept and restored after an indirect loop */
+  PathState path_state;
+  float3 throughput;
+  Ray ray;
 
-	Intersection isect;
+  Intersection isect;
 
-	char ray_state;
+  char ray_state;
 
-	/* indirect loop state */
-	int next_closure;
-	int next_sample;
+  /* indirect loop state */
+  int next_closure;
+  int next_sample;
 
-#ifdef __SUBSURFACE__
-	int ss_next_closure;
-	int ss_next_sample;
-	int next_hit;
-	int num_hits;
-
-	uint lcg_state;
-	LocalIntersection ss_isect;
-#endif  /*__SUBSURFACE__ */
-
-	int shared_sample_count; /* number of branched samples shared with other threads */
-	int original_ray; /* index of original ray when sharing branched samples */
-	bool waiting_on_shared_samples;
+#  ifdef __SUBSURFACE__
+  int ss_next_closure;
+  int ss_next_sample;
+  int next_hit;
+  int num_hits;
+
+  uint lcg_state;
+  LocalIntersection ss_isect;
+#  endif /*__SUBSURFACE__ */
+
+  int shared_sample_count; /* number of branched samples shared with other threads */
+  int original_ray;        /* index of original ray when sharing branched samples */
+  bool waiting_on_shared_samples;
 } SplitBranchedState;
 
-#define SPLIT_DATA_BRANCHED_ENTRIES \
-	SPLIT_DATA_ENTRY( SplitBranchedState, branched_state, 1) \
-	SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0)
+#  define SPLIT_DATA_BRANCHED_ENTRIES \
+    SPLIT_DATA_ENTRY(SplitBranchedState, branched_state, 1) \
+    SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0)
 #else
-#define SPLIT_DATA_BRANCHED_ENTRIES
-#endif  /* __BRANCHED_PATH__ */
+#  define SPLIT_DATA_BRANCHED_ENTRIES
+#endif /* __BRANCHED_PATH__ */
 
 #ifdef __SUBSURFACE__
 #  define SPLIT_DATA_SUBSURFACE_ENTRIES \
-	SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1)
+    SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1)
 #else
 #  define SPLIT_DATA_SUBSURFACE_ENTRIES
-#endif  /* __SUBSURFACE__ */
+#endif /* __SUBSURFACE__ */
 
 #ifdef __VOLUME__
-#  define SPLIT_DATA_VOLUME_ENTRIES \
-	SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1)
+#  define SPLIT_DATA_VOLUME_ENTRIES SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1)
 #else
 #  define SPLIT_DATA_VOLUME_ENTRIES
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 #define SPLIT_DATA_ENTRIES \
-	SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
-	SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
-	SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
-	SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
-	SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
-	SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
-	SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
-	SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
-	SPLIT_DATA_SUBSURFACE_ENTRIES \
-	SPLIT_DATA_VOLUME_ENTRIES \
-	SPLIT_DATA_BRANCHED_ENTRIES \
-	SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
+  SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
+  SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
+  SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
+  SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
+  SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+  SPLIT_DATA_ENTRY( \
+      ccl_global int, queue_data, (NUM_QUEUES * 2)) /* TODO(mai): this is too large? */ \
+  SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
+  SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
+  SPLIT_DATA_SUBSURFACE_ENTRIES \
+  SPLIT_DATA_VOLUME_ENTRIES \
+  SPLIT_DATA_BRANCHED_ENTRIES \
+  SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
 
 /* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
 #define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
-	SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
-	SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
-	SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
-	SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
-	SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
-	SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
-	SPLIT_DATA_SUBSURFACE_ENTRIES \
-	SPLIT_DATA_VOLUME_ENTRIES \
-	SPLIT_DATA_BRANCHED_ENTRIES \
-	SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
+  SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
+  SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
+  SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
+  SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
+  SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+  SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
+  SPLIT_DATA_SUBSURFACE_ENTRIES \
+  SPLIT_DATA_VOLUME_ENTRIES \
+  SPLIT_DATA_BRANCHED_ENTRIES \
+  SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
 
 /* struct that holds pointers to data in the shared state buffer */
 typedef struct SplitData {
 #define SPLIT_DATA_ENTRY(type, name, num) type *name;
-	SPLIT_DATA_ENTRIES
+  SPLIT_DATA_ENTRIES
 #undef SPLIT_DATA_ENTRY
 
-	/* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
-	 * the host easily) but is still used the same as the other data so we have it here in this struct as well
-	 */
-	ccl_global char *ray_state;
+  /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
+   * the host easily) but is still used the same as the other data so we have it here in this struct as well
+   */
+  ccl_global char *ray_state;
 } SplitData;
 
 #ifndef __KERNEL_CUDA__
@@ -148,30 +148,30 @@ __device__ SplitData __split_data;
 #  define kernel_split_state (__split_data)
 __device__ SplitParams __split_param_data;
 #  define kernel_split_params (__split_param_data)
-#endif  /* __KERNEL_CUDA__ */
+#endif /* __KERNEL_CUDA__ */
 
-#define kernel_split_sd(sd, ray_index) ((ShaderData*) \
-	( \
-		((ccl_global char*)kernel_split_state._##sd) + \
-		(sizeof(ShaderData) + sizeof(ShaderClosure)*(kernel_data.integrator.max_closures-1)) * (ray_index) \
-	))
+#define kernel_split_sd(sd, ray_index) \
+  ((ShaderData *)(((ccl_global char *)kernel_split_state._##sd) + \
+                  (sizeof(ShaderData) + \
+                   sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1)) * \
+                      (ray_index)))
 
 /* Local storage for queue_enqueue kernel. */
 typedef struct QueueEnqueueLocals {
-	uint queue_atomics[2];
+  uint queue_atomics[2];
 } QueueEnqueueLocals;
 
 /* Local storage for holdout_emission_blurring_pathtermination_ao kernel. */
 typedef struct BackgroundAOLocals {
-	uint queue_atomics_bg;
-	uint queue_atomics_ao;
+  uint queue_atomics_bg;
+  uint queue_atomics_ao;
 } BackgroundAOLocals;
 
 typedef struct ShaderSortLocals {
-	uint local_value[SHADER_SORT_BLOCK_SIZE];
-	ushort local_index[SHADER_SORT_BLOCK_SIZE];
+  uint local_value[SHADER_SORT_BLOCK_SIZE];
+  ushort local_index[SHADER_SORT_BLOCK_SIZE];
 } ShaderSortLocals;
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_SPLIT_DATA_TYPES_H__ */
+#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 08769fe303b..ba06ae3bc53 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -18,276 +18,247 @@ CCL_NAMESPACE_BEGIN
 
 #if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__)
 
-ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg,
+                                                                                 int ray_index)
 {
-	kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+  kernel_split_branched_path_indirect_loop_init(kg, ray_index);
 
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
 
-	branched_state->ss_next_closure = 0;
-	branched_state->ss_next_sample = 0;
+  branched_state->ss_next_closure = 0;
+  branched_state->ss_next_sample = 0;
 
-	branched_state->num_hits = 0;
-	branched_state->next_hit = 0;
+  branched_state->num_hits = 0;
+  branched_state->next_hit = 0;
 
-	ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
+  ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
 }
 
-ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(KernelGlobals *kg, int ray_index)
+ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(
+    KernelGlobals *kg, int ray_index)
 {
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
-
-	ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index);
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
-	for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
-
-		if(!CLOSURE_IS_BSSRDF(sc->type))
-			continue;
-
-		/* Closure memory will be overwritten, so read required variables now. */
-		Bssrdf *bssrdf = (Bssrdf *)sc;
-		ClosureType bssrdf_type = sc->type;
-		float bssrdf_roughness = bssrdf->roughness;
-
-		/* set up random number generator */
-		if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
-		   branched_state->next_closure == 0 && branched_state->next_sample == 0)
-		{
-			branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state,
-			                                                     0x68bc21eb);
-		}
-		int num_samples = kernel_data.integrator.subsurface_samples * 3;
-		float num_samples_inv = 1.0f/num_samples;
-		uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
-
-		/* do subsurface scatter step with copy of shader data, this will
-		 * replace the BSSRDF with a diffuse BSDF closure */
-		for(int j = branched_state->ss_next_sample; j < num_samples; j++) {
-			ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
-			*hit_state = branched_state->path_state;
-			hit_state->rng_hash = bssrdf_rng_hash;
-			path_state_branch(hit_state, j, num_samples);
-
-			ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect;
-			float bssrdf_u, bssrdf_v;
-			path_branched_rng_2D(kg,
-			                     bssrdf_rng_hash,
-			                     hit_state,
-			                     j,
-			                     num_samples,
-			                     PRNG_BSDF_U,
-			                     &bssrdf_u,
-			                     &bssrdf_v);
-
-			/* intersection is expensive so avoid doing multiple times for the same input */
-			if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) {
-				uint lcg_state = branched_state->lcg_state;
-				LocalIntersection ss_isect_private;
-
-				branched_state->num_hits = subsurface_scatter_multi_intersect(kg,
-				                                                              &ss_isect_private,
-				                                                              sd,
-				                                                              hit_state,
-				                                                              sc,
-				                                                              &lcg_state,
-				                                                              bssrdf_u, bssrdf_v,
-				                                                              true);
-
-				branched_state->lcg_state = lcg_state;
-				*ss_isect = ss_isect_private;
-			}
-
-			hit_state->rng_offset += PRNG_BOUNCE_NUM;
-
-#ifdef __VOLUME__
-			Ray volume_ray = branched_state->ray;
-			bool need_update_volume_stack =
-			        kernel_data.integrator.use_volumes &&
-			        sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
-#endif  /* __VOLUME__ */
-
-			/* compute lighting with the BSDF closure */
-			for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
-				ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index);
-				*bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
-				                   * important as the indirect path will write into bssrdf_sd */
-
-				LocalIntersection ss_isect_private = *ss_isect;
-				subsurface_scatter_multi_setup(kg,
-				                               &ss_isect_private,
-				                               hit,
-				                               bssrdf_sd,
-				                               hit_state,
-				                               bssrdf_type,
-				                               bssrdf_roughness);
-				*ss_isect = ss_isect_private;
-
-#ifdef __VOLUME__
-				if(need_update_volume_stack) {
-					/* Setup ray from previous surface point to the new one. */
-					float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
-					volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
-
-					for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
-						hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k];
-					}
-
-					kernel_volume_stack_update_for_subsurface(kg,
-					                                          emission_sd,
-					                                          &volume_ray,
-					                                          hit_state->volume_stack);
-				}
-#endif  /* __VOLUME__ */
-
-#ifdef __EMISSION__
-				if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
-					/* direct light */
-					if(kernel_data.integrator.use_direct_light) {
-						int all = (kernel_data.integrator.sample_all_lights_direct) ||
-							      (hit_state->flag & PATH_RAY_SHADOW_CATCHER);
-						kernel_branched_path_surface_connect_light(kg,
-						                                           bssrdf_sd,
-						                                           emission_sd,
-						                                           hit_state,
-						                                           branched_state->throughput,
-						                                           num_samples_inv,
-						                                           L,
-						                                           all);
-					}
-				}
-#endif  /* __EMISSION__ */
-
-				/* indirect light */
-				if(kernel_split_branched_path_surface_indirect_light_iter(kg,
-				                                                          ray_index,
-				                                                          num_samples_inv,
-				                                                          bssrdf_sd,
-				                                                          false,
-				                                                          false))
-				{
-					branched_state->ss_next_closure = i;
-					branched_state->ss_next_sample = j;
-					branched_state->next_hit = hit;
-
-					return true;
-				}
-
-				branched_state->next_closure = 0;
-			}
-
-			branched_state->next_hit = 0;
-		}
-
-		branched_state->ss_next_sample = 0;
-	}
-
-	branched_state->ss_next_closure = sd->num_closure;
-
-	branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
-	if(branched_state->waiting_on_shared_samples) {
-		return true;
-	}
-
-	kernel_split_branched_path_indirect_loop_end(kg, ray_index);
-
-	return false;
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+  ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index);
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+  for (int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
+
+    if (!CLOSURE_IS_BSSRDF(sc->type))
+      continue;
+
+    /* Closure memory will be overwritten, so read required variables now. */
+    Bssrdf *bssrdf = (Bssrdf *)sc;
+    ClosureType bssrdf_type = sc->type;
+    float bssrdf_roughness = bssrdf->roughness;
+
+    /* set up random number generator */
+    if (branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
+        branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+      branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state,
+                                                           0x68bc21eb);
+    }
+    int num_samples = kernel_data.integrator.subsurface_samples * 3;
+    float num_samples_inv = 1.0f / num_samples;
+    uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
+
+    /* do subsurface scatter step with copy of shader data, this will
+     * replace the BSSRDF with a diffuse BSDF closure */
+    for (int j = branched_state->ss_next_sample; j < num_samples; j++) {
+      ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
+      *hit_state = branched_state->path_state;
+      hit_state->rng_hash = bssrdf_rng_hash;
+      path_state_branch(hit_state, j, num_samples);
+
+      ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect;
+      float bssrdf_u, bssrdf_v;
+      path_branched_rng_2D(
+          kg, bssrdf_rng_hash, hit_state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+
+      /* intersection is expensive so avoid doing multiple times for the same input */
+      if (branched_state->next_hit == 0 && branched_state->next_closure == 0 &&
+          branched_state->next_sample == 0) {
+        uint lcg_state = branched_state->lcg_state;
+        LocalIntersection ss_isect_private;
+
+        branched_state->num_hits = subsurface_scatter_multi_intersect(
+            kg, &ss_isect_private, sd, hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+
+        branched_state->lcg_state = lcg_state;
+        *ss_isect = ss_isect_private;
+      }
+
+      hit_state->rng_offset += PRNG_BOUNCE_NUM;
+
+#  ifdef __VOLUME__
+      Ray volume_ray = branched_state->ray;
+      bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+                                      sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+#  endif /* __VOLUME__ */
+
+      /* compute lighting with the BSDF closure */
+      for (int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
+        ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index);
+        *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
+                           * important as the indirect path will write into bssrdf_sd */
+
+        LocalIntersection ss_isect_private = *ss_isect;
+        subsurface_scatter_multi_setup(
+            kg, &ss_isect_private, hit, bssrdf_sd, hit_state, bssrdf_type, bssrdf_roughness);
+        *ss_isect = ss_isect_private;
+
+#  ifdef __VOLUME__
+        if (need_update_volume_stack) {
+          /* Setup ray from previous surface point to the new one. */
+          float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
+          volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+          for (int k = 0; k < VOLUME_STACK_SIZE; k++) {
+            hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k];
+          }
+
+          kernel_volume_stack_update_for_subsurface(
+              kg, emission_sd, &volume_ray, hit_state->volume_stack);
+        }
+#  endif /* __VOLUME__ */
+
+#  ifdef __EMISSION__
+        if (branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+          /* direct light */
+          if (kernel_data.integrator.use_direct_light) {
+            int all = (kernel_data.integrator.sample_all_lights_direct) ||
+                      (hit_state->flag & PATH_RAY_SHADOW_CATCHER);
+            kernel_branched_path_surface_connect_light(kg,
+                                                       bssrdf_sd,
+                                                       emission_sd,
+                                                       hit_state,
+                                                       branched_state->throughput,
+                                                       num_samples_inv,
+                                                       L,
+                                                       all);
+          }
+        }
+#  endif /* __EMISSION__ */
+
+        /* indirect light */
+        if (kernel_split_branched_path_surface_indirect_light_iter(
+                kg, ray_index, num_samples_inv, bssrdf_sd, false, false)) {
+          branched_state->ss_next_closure = i;
+          branched_state->ss_next_sample = j;
+          branched_state->next_hit = hit;
+
+          return true;
+        }
+
+        branched_state->next_closure = 0;
+      }
+
+      branched_state->next_hit = 0;
+    }
+
+    branched_state->ss_next_sample = 0;
+  }
+
+  branched_state->ss_next_closure = sd->num_closure;
+
+  branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+  if (branched_state->waiting_on_shared_samples) {
+    return true;
+  }
+
+  kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+  return false;
 }
 
-#endif  /* __BRANCHED_PATH__ && __SUBSURFACE__ */
+#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */
 
 ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
 {
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(thread_index == 0) {
-		/* We will empty both queues in this kernel. */
-		kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
-		kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
-	}
-
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
-	get_ray_index(kg, thread_index,
-	              QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	              kernel_split_state.queue_data,
-	              kernel_split_params.queue_size,
-	              1);
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (thread_index == 0) {
+    /* We will empty both queues in this kernel. */
+    kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+    kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+  }
+
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
+  get_ray_index(kg,
+                thread_index,
+                QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                kernel_split_state.queue_data,
+                kernel_split_params.queue_size,
+                1);
 
 #ifdef __SUBSURFACE__
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-		ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
-		ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
-		if(sd->flag & SD_BSSRDF) {
-
-#ifdef __BRANCHED_PATH__
-			if(!kernel_data.integrator.branched ||
-			   IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))
-			{
-#endif
-				if(kernel_path_subsurface_scatter(kg,
-				                                  sd,
-				                                  emission_sd,
-				                                  L,
-				                                  state,
-				                                  ray,
-				                                  throughput,
-				                                  ss_indirect))
-				{
-					kernel_split_path_end(kg, ray_index);
-				}
-#ifdef __BRANCHED_PATH__
-			}
-			else {
-				kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
-
-				if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
-					ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-				}
-			}
-#endif
-		}
-	}
+  ccl_global char *ray_state = kernel_split_state.ray_state;
+
+  if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+    ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+    ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+    ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+    ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+    if (sd->flag & SD_BSSRDF) {
 
 #  ifdef __BRANCHED_PATH__
-	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
-		kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
-	}
-
-	/* iter loop */
-	ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
-	                          QUEUE_SUBSURFACE_INDIRECT_ITER,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
-
-	if(IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
-		/* for render passes, sum and reset indirect light pass variables
-		 * for the next samples */
-		path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
-		path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
-
-		if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-		}
-	}
-#  endif  /* __BRANCHED_PATH__ */
-
-#endif  /* __SUBSURFACE__ */
+      if (!kernel_data.integrator.branched ||
+          IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#  endif
+        if (kernel_path_subsurface_scatter(
+                kg, sd, emission_sd, L, state, ray, throughput, ss_indirect)) {
+          kernel_split_path_end(kg, ray_index);
+        }
+#  ifdef __BRANCHED_PATH__
+      }
+      else {
+        kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
+
+        if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+          ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+        }
+      }
+#  endif
+    }
+  }
 
+#  ifdef __BRANCHED_PATH__
+  if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+    kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
+  }
+
+  /* iter loop */
+  ray_index = get_ray_index(kg,
+                            ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+                            QUEUE_SUBSURFACE_INDIRECT_ITER,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
+
+  if (IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
+    /* for render passes, sum and reset indirect light pass variables
+     * for the next samples */
+    path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+    path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+    if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+      ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+    }
+  }
+#  endif /* __BRANCHED_PATH__ */
+
+#endif /* __SUBSURFACE__ */
 }
 
 CCL_NAMESPACE_END