ClangFormat: apply to source, most of intern

Apply clang format as proposed in T53211. For details on usage and instructions for migrating branches without conflicts, see: https://wiki.blender.org/wiki/Tools/ClangFormat
author: Campbell Barton <ideasman42@gmail.com> 2019-04-17 07:17:24 +0300
committer: Campbell Barton <ideasman42@gmail.com> 2019-04-17 07:21:24 +0300
commit: e12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 (patch)
tree: 8cf3453d12edb177a218ef8009357518ec6cab6a /intern/cycles/kernel
parent: b3dabc200a4b0399ec6b81f2ff2730d07b44fcaa (diff)
310 files changed, 40081 insertions, 38747 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 01552dff9bb..8a8fee108ae 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -1,7 +1,7 @@
 remove_extra_strict_flags()
 
 set(INC
-	..
+  ..
 )
 
 set(INC_SYS
@@ -9,328 +9,328 @@ set(INC_SYS
 )
 
 set(SRC_CPU_KERNELS
-	kernels/cpu/kernel.cpp
-	kernels/cpu/kernel_sse2.cpp
-	kernels/cpu/kernel_sse3.cpp
-	kernels/cpu/kernel_sse41.cpp
-	kernels/cpu/kernel_avx.cpp
-	kernels/cpu/kernel_avx2.cpp
-	kernels/cpu/kernel_split.cpp
-	kernels/cpu/kernel_split_sse2.cpp
-	kernels/cpu/kernel_split_sse3.cpp
-	kernels/cpu/kernel_split_sse41.cpp
-	kernels/cpu/kernel_split_avx.cpp
-	kernels/cpu/kernel_split_avx2.cpp
-	kernels/cpu/filter.cpp
-	kernels/cpu/filter_sse2.cpp
-	kernels/cpu/filter_sse3.cpp
-	kernels/cpu/filter_sse41.cpp
-	kernels/cpu/filter_avx.cpp
-	kernels/cpu/filter_avx2.cpp
+  kernels/cpu/kernel.cpp
+  kernels/cpu/kernel_sse2.cpp
+  kernels/cpu/kernel_sse3.cpp
+  kernels/cpu/kernel_sse41.cpp
+  kernels/cpu/kernel_avx.cpp
+  kernels/cpu/kernel_avx2.cpp
+  kernels/cpu/kernel_split.cpp
+  kernels/cpu/kernel_split_sse2.cpp
+  kernels/cpu/kernel_split_sse3.cpp
+  kernels/cpu/kernel_split_sse41.cpp
+  kernels/cpu/kernel_split_avx.cpp
+  kernels/cpu/kernel_split_avx2.cpp
+  kernels/cpu/filter.cpp
+  kernels/cpu/filter_sse2.cpp
+  kernels/cpu/filter_sse3.cpp
+  kernels/cpu/filter_sse41.cpp
+  kernels/cpu/filter_avx.cpp
+  kernels/cpu/filter_avx2.cpp
 )
 
 set(SRC_CUDA_KERNELS
-	kernels/cuda/kernel.cu
-	kernels/cuda/kernel_split.cu
-	kernels/cuda/filter.cu
+  kernels/cuda/kernel.cu
+  kernels/cuda/kernel_split.cu
+  kernels/cuda/filter.cu
 )
 
 set(SRC_OPENCL_KERNELS
-	kernels/opencl/kernel_bake.cl
-	kernels/opencl/kernel_base.cl
-	kernels/opencl/kernel_displace.cl
-	kernels/opencl/kernel_background.cl
-	kernels/opencl/kernel_state_buffer_size.cl
-	kernels/opencl/kernel_split_bundle.cl
-	kernels/opencl/kernel_data_init.cl
-	kernels/opencl/kernel_path_init.cl
-	kernels/opencl/kernel_queue_enqueue.cl
-	kernels/opencl/kernel_scene_intersect.cl
-	kernels/opencl/kernel_lamp_emission.cl
-	kernels/opencl/kernel_do_volume.cl
-	kernels/opencl/kernel_indirect_background.cl
-	kernels/opencl/kernel_shader_setup.cl
-	kernels/opencl/kernel_shader_sort.cl
-	kernels/opencl/kernel_shader_eval.cl
-	kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
-	kernels/opencl/kernel_subsurface_scatter.cl
-	kernels/opencl/kernel_direct_lighting.cl
-	kernels/opencl/kernel_shadow_blocked_ao.cl
-	kernels/opencl/kernel_shadow_blocked_dl.cl
-	kernels/opencl/kernel_enqueue_inactive.cl
-	kernels/opencl/kernel_next_iteration_setup.cl
-	kernels/opencl/kernel_indirect_subsurface.cl
-	kernels/opencl/kernel_buffer_update.cl
-	kernels/opencl/filter.cl
+  kernels/opencl/kernel_bake.cl
+  kernels/opencl/kernel_base.cl
+  kernels/opencl/kernel_displace.cl
+  kernels/opencl/kernel_background.cl
+  kernels/opencl/kernel_state_buffer_size.cl
+  kernels/opencl/kernel_split_bundle.cl
+  kernels/opencl/kernel_data_init.cl
+  kernels/opencl/kernel_path_init.cl
+  kernels/opencl/kernel_queue_enqueue.cl
+  kernels/opencl/kernel_scene_intersect.cl
+  kernels/opencl/kernel_lamp_emission.cl
+  kernels/opencl/kernel_do_volume.cl
+  kernels/opencl/kernel_indirect_background.cl
+  kernels/opencl/kernel_shader_setup.cl
+  kernels/opencl/kernel_shader_sort.cl
+  kernels/opencl/kernel_shader_eval.cl
+  kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+  kernels/opencl/kernel_subsurface_scatter.cl
+  kernels/opencl/kernel_direct_lighting.cl
+  kernels/opencl/kernel_shadow_blocked_ao.cl
+  kernels/opencl/kernel_shadow_blocked_dl.cl
+  kernels/opencl/kernel_enqueue_inactive.cl
+  kernels/opencl/kernel_next_iteration_setup.cl
+  kernels/opencl/kernel_indirect_subsurface.cl
+  kernels/opencl/kernel_buffer_update.cl
+  kernels/opencl/filter.cl
 )
 
 set(SRC_BVH_HEADERS
-	bvh/bvh.h
-	bvh/bvh_nodes.h
-	bvh/bvh_shadow_all.h
-	bvh/bvh_local.h
-	bvh/bvh_traversal.h
-	bvh/bvh_types.h
-	bvh/bvh_volume.h
-	bvh/bvh_volume_all.h
-	bvh/qbvh_nodes.h
-	bvh/qbvh_shadow_all.h
-	bvh/qbvh_local.h
-	bvh/qbvh_traversal.h
-	bvh/qbvh_volume.h
-	bvh/qbvh_volume_all.h
-	bvh/obvh_nodes.h
-	bvh/obvh_shadow_all.h
-	bvh/obvh_local.h
-	bvh/obvh_traversal.h
-	bvh/obvh_volume.h
-	bvh/obvh_volume_all.h
-	bvh/bvh_embree.h
+  bvh/bvh.h
+  bvh/bvh_nodes.h
+  bvh/bvh_shadow_all.h
+  bvh/bvh_local.h
+  bvh/bvh_traversal.h
+  bvh/bvh_types.h
+  bvh/bvh_volume.h
+  bvh/bvh_volume_all.h
+  bvh/qbvh_nodes.h
+  bvh/qbvh_shadow_all.h
+  bvh/qbvh_local.h
+  bvh/qbvh_traversal.h
+  bvh/qbvh_volume.h
+  bvh/qbvh_volume_all.h
+  bvh/obvh_nodes.h
+  bvh/obvh_shadow_all.h
+  bvh/obvh_local.h
+  bvh/obvh_traversal.h
+  bvh/obvh_volume.h
+  bvh/obvh_volume_all.h
+  bvh/bvh_embree.h
 )
 
 set(SRC_HEADERS
-	kernel_accumulate.h
-	kernel_bake.h
-	kernel_camera.h
-	kernel_color.h
-	kernel_compat_cpu.h
-	kernel_compat_cuda.h
-	kernel_compat_opencl.h
-	kernel_differential.h
-	kernel_emission.h
-	kernel_film.h
-	kernel_globals.h
-	kernel_id_passes.h
-	kernel_jitter.h
-	kernel_light.h
-	kernel_math.h
-	kernel_montecarlo.h
-	kernel_passes.h
-	kernel_path.h
-	kernel_path_branched.h
-	kernel_path_common.h
-	kernel_path_state.h
-	kernel_path_surface.h
-	kernel_path_subsurface.h
-	kernel_path_volume.h
-	kernel_profiling.h
-	kernel_projection.h
-	kernel_queues.h
-	kernel_random.h
-	kernel_shader.h
-	kernel_shadow.h
-	kernel_subsurface.h
-	kernel_textures.h
-	kernel_types.h
-	kernel_volume.h
-	kernel_work_stealing.h
+  kernel_accumulate.h
+  kernel_bake.h
+  kernel_camera.h
+  kernel_color.h
+  kernel_compat_cpu.h
+  kernel_compat_cuda.h
+  kernel_compat_opencl.h
+  kernel_differential.h
+  kernel_emission.h
+  kernel_film.h
+  kernel_globals.h
+  kernel_id_passes.h
+  kernel_jitter.h
+  kernel_light.h
+  kernel_math.h
+  kernel_montecarlo.h
+  kernel_passes.h
+  kernel_path.h
+  kernel_path_branched.h
+  kernel_path_common.h
+  kernel_path_state.h
+  kernel_path_surface.h
+  kernel_path_subsurface.h
+  kernel_path_volume.h
+  kernel_profiling.h
+  kernel_projection.h
+  kernel_queues.h
+  kernel_random.h
+  kernel_shader.h
+  kernel_shadow.h
+  kernel_subsurface.h
+  kernel_textures.h
+  kernel_types.h
+  kernel_volume.h
+  kernel_work_stealing.h
 )
 
 set(SRC_KERNELS_CPU_HEADERS
-	kernel.h
-	kernels/cpu/kernel_cpu.h
-	kernels/cpu/kernel_cpu_impl.h
-	kernels/cpu/kernel_cpu_image.h
-	kernels/cpu/filter_cpu.h
-	kernels/cpu/filter_cpu_impl.h
+  kernel.h
+  kernels/cpu/kernel_cpu.h
+  kernels/cpu/kernel_cpu_impl.h
+  kernels/cpu/kernel_cpu_image.h
+  kernels/cpu/filter_cpu.h
+  kernels/cpu/filter_cpu_impl.h
 )
 
 set(SRC_KERNELS_CUDA_HEADERS
-	kernels/cuda/kernel_config.h
-	kernels/cuda/kernel_cuda_image.h
+  kernels/cuda/kernel_config.h
+  kernels/cuda/kernel_cuda_image.h
 )
 
 set(SRC_KERNELS_OPENCL_HEADERS
-	kernels/opencl/kernel_split_function.h
-	kernels/opencl/kernel_opencl_image.h
+  kernels/opencl/kernel_split_function.h
+  kernels/opencl/kernel_opencl_image.h
 )
 
 set(SRC_CLOSURE_HEADERS
-	closure/alloc.h
-	closure/bsdf.h
-	closure/bsdf_ashikhmin_velvet.h
-	closure/bsdf_diffuse.h
-	closure/bsdf_diffuse_ramp.h
-	closure/bsdf_microfacet.h
-	closure/bsdf_microfacet_multi.h
-	closure/bsdf_microfacet_multi_impl.h
-	closure/bsdf_oren_nayar.h
-	closure/bsdf_phong_ramp.h
-	closure/bsdf_reflection.h
-	closure/bsdf_refraction.h
-	closure/bsdf_toon.h
-	closure/bsdf_transparent.h
-	closure/bsdf_util.h
-	closure/bsdf_ashikhmin_shirley.h
-	closure/bsdf_hair.h
-	closure/bssrdf.h
-	closure/emissive.h
-	closure/volume.h
-	closure/bsdf_principled_diffuse.h
-	closure/bsdf_principled_sheen.h
+  closure/alloc.h
+  closure/bsdf.h
+  closure/bsdf_ashikhmin_velvet.h
+  closure/bsdf_diffuse.h
+  closure/bsdf_diffuse_ramp.h
+  closure/bsdf_microfacet.h
+  closure/bsdf_microfacet_multi.h
+  closure/bsdf_microfacet_multi_impl.h
+  closure/bsdf_oren_nayar.h
+  closure/bsdf_phong_ramp.h
+  closure/bsdf_reflection.h
+  closure/bsdf_refraction.h
+  closure/bsdf_toon.h
+  closure/bsdf_transparent.h
+  closure/bsdf_util.h
+  closure/bsdf_ashikhmin_shirley.h
+  closure/bsdf_hair.h
+  closure/bssrdf.h
+  closure/emissive.h
+  closure/volume.h
+  closure/bsdf_principled_diffuse.h
+  closure/bsdf_principled_sheen.h
     closure/bsdf_hair_principled.h
 )
 
 set(SRC_SVM_HEADERS
-	svm/svm.h
-	svm/svm_ao.h
-	svm/svm_attribute.h
-	svm/svm_bevel.h
-	svm/svm_blackbody.h
-	svm/svm_bump.h
-	svm/svm_camera.h
-	svm/svm_closure.h
-	svm/svm_convert.h
-	svm/svm_checker.h
-	svm/svm_color_util.h
-	svm/svm_brick.h
-	svm/svm_displace.h
-	svm/svm_fresnel.h
-	svm/svm_wireframe.h
-	svm/svm_wavelength.h
-	svm/svm_gamma.h
-	svm/svm_brightness.h
-	svm/svm_geometry.h
-	svm/svm_gradient.h
-	svm/svm_hsv.h
-	svm/svm_ies.h
-	svm/svm_image.h
-	svm/svm_invert.h
-	svm/svm_light_path.h
-	svm/svm_magic.h
-	svm/svm_mapping.h
-	svm/svm_math.h
-	svm/svm_math_util.h
-	svm/svm_mix.h
-	svm/svm_musgrave.h
-	svm/svm_noise.h
-	svm/svm_noisetex.h
-	svm/svm_normal.h
-	svm/svm_ramp.h
-	svm/svm_ramp_util.h
-	svm/svm_sepcomb_hsv.h
-	svm/svm_sepcomb_vector.h
-	svm/svm_sky.h
-	svm/svm_tex_coord.h
-	svm/svm_texture.h
-	svm/svm_types.h
-	svm/svm_value.h
-	svm/svm_vector_transform.h
-	svm/svm_voronoi.h
-	svm/svm_voxel.h
-	svm/svm_wave.h
+  svm/svm.h
+  svm/svm_ao.h
+  svm/svm_attribute.h
+  svm/svm_bevel.h
+  svm/svm_blackbody.h
+  svm/svm_bump.h
+  svm/svm_camera.h
+  svm/svm_closure.h
+  svm/svm_convert.h
+  svm/svm_checker.h
+  svm/svm_color_util.h
+  svm/svm_brick.h
+  svm/svm_displace.h
+  svm/svm_fresnel.h
+  svm/svm_wireframe.h
+  svm/svm_wavelength.h
+  svm/svm_gamma.h
+  svm/svm_brightness.h
+  svm/svm_geometry.h
+  svm/svm_gradient.h
+  svm/svm_hsv.h
+  svm/svm_ies.h
+  svm/svm_image.h
+  svm/svm_invert.h
+  svm/svm_light_path.h
+  svm/svm_magic.h
+  svm/svm_mapping.h
+  svm/svm_math.h
+  svm/svm_math_util.h
+  svm/svm_mix.h
+  svm/svm_musgrave.h
+  svm/svm_noise.h
+  svm/svm_noisetex.h
+  svm/svm_normal.h
+  svm/svm_ramp.h
+  svm/svm_ramp_util.h
+  svm/svm_sepcomb_hsv.h
+  svm/svm_sepcomb_vector.h
+  svm/svm_sky.h
+  svm/svm_tex_coord.h
+  svm/svm_texture.h
+  svm/svm_types.h
+  svm/svm_value.h
+  svm/svm_vector_transform.h
+  svm/svm_voronoi.h
+  svm/svm_voxel.h
+  svm/svm_wave.h
 )
 
 set(SRC_GEOM_HEADERS
-	geom/geom.h
-	geom/geom_attribute.h
-	geom/geom_curve.h
-	geom/geom_curve_intersect.h
-	geom/geom_motion_curve.h
-	geom/geom_motion_triangle.h
-	geom/geom_motion_triangle_intersect.h
-	geom/geom_motion_triangle_shader.h
-	geom/geom_object.h
-	geom/geom_patch.h
-	geom/geom_primitive.h
-	geom/geom_subd_triangle.h
-	geom/geom_triangle.h
-	geom/geom_triangle_intersect.h
-	geom/geom_volume.h
+  geom/geom.h
+  geom/geom_attribute.h
+  geom/geom_curve.h
+  geom/geom_curve_intersect.h
+  geom/geom_motion_curve.h
+  geom/geom_motion_triangle.h
+  geom/geom_motion_triangle_intersect.h
+  geom/geom_motion_triangle_shader.h
+  geom/geom_object.h
+  geom/geom_patch.h
+  geom/geom_primitive.h
+  geom/geom_subd_triangle.h
+  geom/geom_triangle.h
+  geom/geom_triangle_intersect.h
+  geom/geom_volume.h
 )
 
 set(SRC_FILTER_HEADERS
-	filter/filter.h
-	filter/filter_defines.h
-	filter/filter_features.h
-	filter/filter_features_sse.h
-	filter/filter_kernel.h
-	filter/filter_nlm_cpu.h
-	filter/filter_nlm_gpu.h
-	filter/filter_prefilter.h
-	filter/filter_reconstruction.h
-	filter/filter_transform.h
-	filter/filter_transform_gpu.h
-	filter/filter_transform_sse.h
+  filter/filter.h
+  filter/filter_defines.h
+  filter/filter_features.h
+  filter/filter_features_sse.h
+  filter/filter_kernel.h
+  filter/filter_nlm_cpu.h
+  filter/filter_nlm_gpu.h
+  filter/filter_prefilter.h
+  filter/filter_reconstruction.h
+  filter/filter_transform.h
+  filter/filter_transform_gpu.h
+  filter/filter_transform_sse.h
 )
 
 set(SRC_UTIL_HEADERS
-	../util/util_atomic.h
-	../util/util_color.h
-	../util/util_defines.h
-	../util/util_half.h
-	../util/util_hash.h
-	../util/util_math.h
-	../util/util_math_fast.h
-	../util/util_math_intersect.h
-	../util/util_math_float2.h
-	../util/util_math_float3.h
-	../util/util_math_float4.h
-	../util/util_math_int2.h
-	../util/util_math_int3.h
-	../util/util_math_int4.h
-	../util/util_math_matrix.h
-	../util/util_projection.h
-	../util/util_rect.h
-	../util/util_static_assert.h
-	../util/util_transform.h
-	../util/util_texture.h
-	../util/util_types.h
-	../util/util_types_float2.h
-	../util/util_types_float2_impl.h
-	../util/util_types_float3.h
-	../util/util_types_float3_impl.h
-	../util/util_types_float4.h
-	../util/util_types_float4_impl.h
-	../util/util_types_float8.h
-	../util/util_types_float8_impl.h
-	../util/util_types_int2.h
-	../util/util_types_int2_impl.h
-	../util/util_types_int3.h
-	../util/util_types_int3_impl.h
-	../util/util_types_int4.h
-	../util/util_types_int4_impl.h
-	../util/util_types_uchar2.h
-	../util/util_types_uchar2_impl.h
-	../util/util_types_uchar3.h
-	../util/util_types_uchar3_impl.h
-	../util/util_types_uchar4.h
-	../util/util_types_uchar4_impl.h
-	../util/util_types_uint2.h
-	../util/util_types_uint2_impl.h
-	../util/util_types_uint3.h
-	../util/util_types_uint3_impl.h
-	../util/util_types_uint4.h
-	../util/util_types_uint4_impl.h
-	../util/util_types_ushort4.h
-	../util/util_types_vector3.h
-	../util/util_types_vector3_impl.h
+  ../util/util_atomic.h
+  ../util/util_color.h
+  ../util/util_defines.h
+  ../util/util_half.h
+  ../util/util_hash.h
+  ../util/util_math.h
+  ../util/util_math_fast.h
+  ../util/util_math_intersect.h
+  ../util/util_math_float2.h
+  ../util/util_math_float3.h
+  ../util/util_math_float4.h
+  ../util/util_math_int2.h
+  ../util/util_math_int3.h
+  ../util/util_math_int4.h
+  ../util/util_math_matrix.h
+  ../util/util_projection.h
+  ../util/util_rect.h
+  ../util/util_static_assert.h
+  ../util/util_transform.h
+  ../util/util_texture.h
+  ../util/util_types.h
+  ../util/util_types_float2.h
+  ../util/util_types_float2_impl.h
+  ../util/util_types_float3.h
+  ../util/util_types_float3_impl.h
+  ../util/util_types_float4.h
+  ../util/util_types_float4_impl.h
+  ../util/util_types_float8.h
+  ../util/util_types_float8_impl.h
+  ../util/util_types_int2.h
+  ../util/util_types_int2_impl.h
+  ../util/util_types_int3.h
+  ../util/util_types_int3_impl.h
+  ../util/util_types_int4.h
+  ../util/util_types_int4_impl.h
+  ../util/util_types_uchar2.h
+  ../util/util_types_uchar2_impl.h
+  ../util/util_types_uchar3.h
+  ../util/util_types_uchar3_impl.h
+  ../util/util_types_uchar4.h
+  ../util/util_types_uchar4_impl.h
+  ../util/util_types_uint2.h
+  ../util/util_types_uint2_impl.h
+  ../util/util_types_uint3.h
+  ../util/util_types_uint3_impl.h
+  ../util/util_types_uint4.h
+  ../util/util_types_uint4_impl.h
+  ../util/util_types_ushort4.h
+  ../util/util_types_vector3.h
+  ../util/util_types_vector3_impl.h
 )
 
 set(SRC_SPLIT_HEADERS
-	split/kernel_branched.h
-	split/kernel_buffer_update.h
-	split/kernel_data_init.h
-	split/kernel_direct_lighting.h
-	split/kernel_do_volume.h
-	split/kernel_enqueue_inactive.h
-	split/kernel_holdout_emission_blurring_pathtermination_ao.h
-	split/kernel_indirect_background.h
-	split/kernel_indirect_subsurface.h
-	split/kernel_lamp_emission.h
-	split/kernel_next_iteration_setup.h
-	split/kernel_path_init.h
-	split/kernel_queue_enqueue.h
-	split/kernel_scene_intersect.h
-	split/kernel_shader_setup.h
-	split/kernel_shader_sort.h
-	split/kernel_shader_eval.h
-	split/kernel_shadow_blocked_ao.h
-	split/kernel_shadow_blocked_dl.h
-	split/kernel_split_common.h
-	split/kernel_split_data.h
-	split/kernel_split_data_types.h
-	split/kernel_subsurface_scatter.h
+  split/kernel_branched.h
+  split/kernel_buffer_update.h
+  split/kernel_data_init.h
+  split/kernel_direct_lighting.h
+  split/kernel_do_volume.h
+  split/kernel_enqueue_inactive.h
+  split/kernel_holdout_emission_blurring_pathtermination_ao.h
+  split/kernel_indirect_background.h
+  split/kernel_indirect_subsurface.h
+  split/kernel_lamp_emission.h
+  split/kernel_next_iteration_setup.h
+  split/kernel_path_init.h
+  split/kernel_queue_enqueue.h
+  split/kernel_scene_intersect.h
+  split/kernel_shader_setup.h
+  split/kernel_shader_sort.h
+  split/kernel_shader_eval.h
+  split/kernel_shadow_blocked_ao.h
+  split/kernel_shadow_blocked_dl.h
+  split/kernel_split_common.h
+  split/kernel_split_data.h
+  split/kernel_split_data_types.h
+  split/kernel_subsurface_scatter.h
 )
 
 set(LIB
@@ -340,145 +340,145 @@ set(LIB
 # CUDA module
 
 if(WITH_CYCLES_CUDA_BINARIES)
-	# 64 bit only
-	set(CUDA_BITS 64)
-
-	# CUDA version
-	execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
-	string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}")
-	string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}")
-	set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
-
-	# warn for other versions
-	if(CUDA_VERSION MATCHES "101")
-	else()
-		message(WARNING
-			"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
-			"build may succeed but only CUDA 10.1 is officially supported")
-	endif()
-
-	# build for each arch
-	set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
-		${SRC_HEADERS}
-		${SRC_KERNELS_CUDA_HEADERS}
-		${SRC_BVH_HEADERS}
-		${SRC_SVM_HEADERS}
-		${SRC_GEOM_HEADERS}
-		${SRC_CLOSURE_HEADERS}
-		${SRC_UTIL_HEADERS}
-	)
-	set(cuda_filter_sources kernels/cuda/filter.cu
-		${SRC_HEADERS}
-		${SRC_KERNELS_CUDA_HEADERS}
-		${SRC_FILTER_HEADERS}
-		${SRC_UTIL_HEADERS}
-	)
-	set(cuda_cubins)
-
-	macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
-		set(cuda_cubin ${name}_${arch}.cubin)
-
-		set(kernel_sources ${sources})
-		if(NOT ${prev_arch} STREQUAL "none")
-			set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
-		endif()
-
-		set(cuda_kernel_src "/kernels/cuda/${name}.cu")
-
-		set(cuda_flags
-			-D CCL_NAMESPACE_BEGIN=
-			-D CCL_NAMESPACE_END=
-			-D NVCC
-			-m ${CUDA_BITS}
-			-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-			-I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
-			--use_fast_math
-			-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin})
-
-		if(${experimental})
-			set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
-			set(name ${name}_experimental)
-		endif()
-
-		if(WITH_CYCLES_DEBUG)
-			set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__)
-		endif()
-
-		if(WITH_CYCLES_CUBIN_COMPILER)
-			string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
-
-			# Needed to find libnvrtc-builtins.so. Can't do it from inside
-			# cycles_cubin_cc since the env variable is read before main()
-			if(APPLE)
-				set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-					-E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
-			elseif(UNIX)
-				set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-					-E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
-			endif()
-
-			add_custom_command(
-				OUTPUT ${cuda_cubin}
-				COMMAND ${CUBIN_CC_ENV}
-						"$<TARGET_FILE:cycles_cubin_cc>"
-						-target ${CUDA_ARCH}
-						-i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
-						${cuda_flags}
-						-v
-						-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
-				DEPENDS ${kernel_sources} cycles_cubin_cc)
-		else()
-			add_custom_command(
-				OUTPUT ${cuda_cubin}
-				COMMAND ${CUDA_NVCC_EXECUTABLE}
-						-arch=${arch}
-						${CUDA_NVCC_FLAGS}
-						--cubin
-						${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
-						--ptxas-options="-v"
-						${cuda_flags}
-				DEPENDS ${kernel_sources})
-		endif()
-		delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
-		list(APPEND cuda_cubins ${cuda_cubin})
-
-		unset(cuda_debug_flags)
-	endmacro()
-
-	set(prev_arch "none")
-	foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
-		if(${arch} MATCHES "sm_2.")
-			message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
-		elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100)
-			message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
-		else()
-			# Compile regular kernel
-			CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)
-			CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
-
-			if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
-				# Compile split kernel
-				CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE)
-			endif()
-
-			if(WITH_CYCLES_CUDA_BUILD_SERIAL)
-				set(prev_arch ${arch})
-			endif()
-		endif()
-	endforeach()
-
-	add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
-	cycles_set_solution_folder(cycles_kernel_cuda)
+  # 64 bit only
+  set(CUDA_BITS 64)
+
+  # CUDA version
+  execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
+  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}")
+  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}")
+  set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
+
+  # warn for other versions
+  if(CUDA_VERSION MATCHES "101")
+  else()
+    message(WARNING
+      "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
+      "build may succeed but only CUDA 10.1 is officially supported")
+  endif()
+
+  # build for each arch
+  set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
+    ${SRC_HEADERS}
+    ${SRC_KERNELS_CUDA_HEADERS}
+    ${SRC_BVH_HEADERS}
+    ${SRC_SVM_HEADERS}
+    ${SRC_GEOM_HEADERS}
+    ${SRC_CLOSURE_HEADERS}
+    ${SRC_UTIL_HEADERS}
+  )
+  set(cuda_filter_sources kernels/cuda/filter.cu
+    ${SRC_HEADERS}
+    ${SRC_KERNELS_CUDA_HEADERS}
+    ${SRC_FILTER_HEADERS}
+    ${SRC_UTIL_HEADERS}
+  )
+  set(cuda_cubins)
+
+  macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
+    set(cuda_cubin ${name}_${arch}.cubin)
+
+    set(kernel_sources ${sources})
+    if(NOT ${prev_arch} STREQUAL "none")
+      set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
+    endif()
+
+    set(cuda_kernel_src "/kernels/cuda/${name}.cu")
+
+    set(cuda_flags
+      -D CCL_NAMESPACE_BEGIN=
+      -D CCL_NAMESPACE_END=
+      -D NVCC
+      -m ${CUDA_BITS}
+      -I ${CMAKE_CURRENT_SOURCE_DIR}/..
+      -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
+      --use_fast_math
+      -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin})
+
+    if(${experimental})
+      set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
+      set(name ${name}_experimental)
+    endif()
+
+    if(WITH_CYCLES_DEBUG)
+      set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__)
+    endif()
+
+    if(WITH_CYCLES_CUBIN_COMPILER)
+      string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
+
+      # Needed to find libnvrtc-builtins.so. Can't do it from inside
+      # cycles_cubin_cc since the env variable is read before main()
+      if(APPLE)
+        set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+          -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
+      elseif(UNIX)
+        set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+          -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+      endif()
+
+      add_custom_command(
+        OUTPUT ${cuda_cubin}
+        COMMAND ${CUBIN_CC_ENV}
+            "$<TARGET_FILE:cycles_cubin_cc>"
+            -target ${CUDA_ARCH}
+            -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
+            ${cuda_flags}
+            -v
+            -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
+        DEPENDS ${kernel_sources} cycles_cubin_cc)
+    else()
+      add_custom_command(
+        OUTPUT ${cuda_cubin}
+        COMMAND ${CUDA_NVCC_EXECUTABLE}
+            -arch=${arch}
+            ${CUDA_NVCC_FLAGS}
+            --cubin
+            ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
+            --ptxas-options="-v"
+            ${cuda_flags}
+        DEPENDS ${kernel_sources})
+    endif()
+    delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
+    list(APPEND cuda_cubins ${cuda_cubin})
+
+    unset(cuda_debug_flags)
+  endmacro()
+
+  set(prev_arch "none")
+  foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
+    if(${arch} MATCHES "sm_2.")
+      message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
+    elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100)
+      message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
+    else()
+      # Compile regular kernel
+      CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)
+      CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
+
+      if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
+        # Compile split kernel
+        CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE)
+      endif()
+
+      if(WITH_CYCLES_CUDA_BUILD_SERIAL)
+        set(prev_arch ${arch})
+      endif()
+    endif()
+  endforeach()
+
+  add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
+  cycles_set_solution_folder(cycles_kernel_cuda)
 endif()
 
 # OSL module
 
 if(WITH_CYCLES_OSL)
-	list(APPEND LIB
-		cycles_kernel_osl
-	)
-	add_subdirectory(osl)
-	add_subdirectory(shaders)
+  list(APPEND LIB
+    cycles_kernel_osl
+  )
+  add_subdirectory(osl)
+  add_subdirectory(shaders)
 endif()
 
 # CPU module
@@ -491,56 +491,56 @@ set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAG
 set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
 
 if(CXX_HAS_SSE)
-	set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
 endif()
 
 if(CXX_HAS_AVX)
-	set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
 endif()
 
 if(CXX_HAS_AVX2)
-	set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
-	set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
 endif()
 
 cycles_add_library(cycles_kernel "${LIB}"
-	${SRC_CPU_KERNELS}
-	${SRC_CUDA_KERNELS}
-	${SRC_OPENCL_KERNELS}
-	${SRC_HEADERS}
-	${SRC_KERNELS_CPU_HEADERS}
-	${SRC_KERNELS_CUDA_HEADERS}
-	${SRC_KERNELS_OPENCL_HEADERS}
-	${SRC_BVH_HEADERS}
-	${SRC_CLOSURE_HEADERS}
-	${SRC_FILTER_HEADERS}
-	${SRC_SVM_HEADERS}
-	${SRC_GEOM_HEADERS}
-	${SRC_SPLIT_HEADERS}
+  ${SRC_CPU_KERNELS}
+  ${SRC_CUDA_KERNELS}
+  ${SRC_OPENCL_KERNELS}
+  ${SRC_HEADERS}
+  ${SRC_KERNELS_CPU_HEADERS}
+  ${SRC_KERNELS_CUDA_HEADERS}
+  ${SRC_KERNELS_OPENCL_HEADERS}
+  ${SRC_BVH_HEADERS}
+  ${SRC_CLOSURE_HEADERS}
+  ${SRC_FILTER_HEADERS}
+  ${SRC_SVM_HEADERS}
+  ${SRC_GEOM_HEADERS}
+  ${SRC_SPLIT_HEADERS}
 )
 
 if(WITH_CYCLES_CUDA)
-	add_dependencies(cycles_kernel cycles_kernel_cuda)
+  add_dependencies(cycles_kernel cycles_kernel_cuda)
 endif()
 
 # OpenCL kernel
 
 #set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
 #add_custom_command(
-#	OUTPUT ${KERNEL_PREPROCESSED}
-#	COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
-#	DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
+#   OUTPUT ${KERNEL_PREPROCESSED}
+#   COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
+#   DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
 #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
 #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
 
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index e5f807833f3..13e72ed299f 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -57,19 +57,19 @@ CCL_NAMESPACE_BEGIN
 
 #if defined(__HAIR__)
 #  define BVH_FUNCTION_NAME bvh_intersect_hair
-#  define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
+#  define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH
 #  include "kernel/bvh/bvh_traversal.h"
 #endif
 
 #if defined(__OBJECT_MOTION__)
 #  define BVH_FUNCTION_NAME bvh_intersect_motion
-#  define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#  define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
 #  include "kernel/bvh/bvh_traversal.h"
 #endif
 
 #if defined(__HAIR__) && defined(__OBJECT_MOTION__)
 #  define BVH_FUNCTION_NAME bvh_intersect_hair_motion
-#  define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
+#  define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH | BVH_MOTION
 #  include "kernel/bvh/bvh_traversal.h"
 #endif
 
@@ -82,10 +82,10 @@ CCL_NAMESPACE_BEGIN
 
 #  if defined(__OBJECT_MOTION__)
 #    define BVH_FUNCTION_NAME bvh_intersect_local_motion
-#    define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR
+#    define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
 #    include "kernel/bvh/bvh_local.h"
 #  endif
-#endif  /* __BVH_LOCAL__ */
+#endif /* __BVH_LOCAL__ */
 
 /* Volume BVH traversal */
 
@@ -96,16 +96,16 @@ CCL_NAMESPACE_BEGIN
 
 #  if defined(__INSTANCING__)
 #    define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
-#    define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+#    define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
 #    include "kernel/bvh/bvh_volume.h"
 #  endif
 
 #  if defined(__OBJECT_MOTION__)
 #    define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-#    define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
+#    define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
 #    include "kernel/bvh/bvh_volume.h"
 #  endif
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 /* Record all intersections - Shadow BVH traversal */
 
@@ -122,22 +122,22 @@ CCL_NAMESPACE_BEGIN
 
 #  if defined(__HAIR__)
 #    define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
-#    define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+#    define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
 #    include "kernel/bvh/bvh_shadow_all.h"
 #  endif
 
 #  if defined(__OBJECT_MOTION__)
 #    define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
-#    define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#    define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
 #    include "kernel/bvh/bvh_shadow_all.h"
 #  endif
 
 #  if defined(__HAIR__) && defined(__OBJECT_MOTION__)
 #    define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
-#    define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
+#    define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
 #    include "kernel/bvh/bvh_shadow_all.h"
 #  endif
-#endif  /* __SHADOW_RECORD_ALL__ */
+#endif /* __SHADOW_RECORD_ALL__ */
 
 /* Record all intersections - Volume BVH traversal  */
 
@@ -148,16 +148,16 @@ CCL_NAMESPACE_BEGIN
 
 #  if defined(__INSTANCING__)
 #    define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
-#    define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+#    define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
 #    include "kernel/bvh/bvh_volume_all.h"
 #  endif
 
 #  if defined(__OBJECT_MOTION__)
 #    define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-#    define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
+#    define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
 #    include "kernel/bvh/bvh_volume_all.h"
 #  endif
-#endif  /* __VOLUME_RECORD_ALL__ */
+#endif /* __VOLUME_RECORD_ALL__ */
 
 #undef BVH_FEATURE
 #undef BVH_NAME_JOIN
@@ -166,15 +166,15 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline bool scene_intersect_valid(const Ray *ray)
 {
-	/* NOTE: Due to some vectorization code  non-finite origin point might
-	 * cause lots of false-positive intersections which will overflow traversal
-	 * stack.
-	 * This code is a quick way to perform early output, to avoid crashes in
-	 * such cases.
-	 * From production scenes so far it seems it's enough to test first element
-	 * only.
-	 */
-	return isfinite(ray->P.x);
+  /* NOTE: Due to some vectorization code  non-finite origin point might
+   * cause lots of false-positive intersections which will overflow traversal
+   * stack.
+   * This code is a quick way to perform early output, to avoid crashes in
+   * such cases.
+   * From production scenes so far it seems it's enough to test first element
+   * only.
+   */
+  return isfinite(ray->P.x);
 }
 
 /* Note: ray is passed by value to work around a possible CUDA compiler bug. */
@@ -186,59 +186,60 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
                                           float difl,
                                           float extmax)
 {
-	PROFILING_INIT(kg, PROFILING_INTERSECT);
+  PROFILING_INIT(kg, PROFILING_INTERSECT);
 
-	if(!scene_intersect_valid(&ray)) {
-		return false;
-	}
+  if (!scene_intersect_valid(&ray)) {
+    return false;
+  }
 #ifdef __EMBREE__
-	if(kernel_data.bvh.scene) {
-		isect->t = ray.t;
-		CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
-		IntersectContext rtc_ctx(&ctx);
-		RTCRayHit ray_hit;
-		kernel_embree_setup_rayhit(ray, ray_hit, visibility);
-		rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
-		if(ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
-			kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
-			return true;
-		}
-		return false;
-	}
-#endif  /* __EMBREE__ */
+  if (kernel_data.bvh.scene) {
+    isect->t = ray.t;
+    CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
+    IntersectContext rtc_ctx(&ctx);
+    RTCRayHit ray_hit;
+    kernel_embree_setup_rayhit(ray, ray_hit, visibility);
+    rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
+    if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
+        ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
+      kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
+      return true;
+    }
+    return false;
+  }
+#endif /* __EMBREE__ */
 #ifdef __OBJECT_MOTION__
-	if(kernel_data.bvh.have_motion) {
+  if (kernel_data.bvh.have_motion) {
 #  ifdef __HAIR__
-		if(kernel_data.bvh.have_curves)
-			return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax);
-#  endif  /* __HAIR__ */
+    if (kernel_data.bvh.have_curves)
+      return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax);
+#  endif /* __HAIR__ */
 
-		return bvh_intersect_motion(kg, &ray, isect, visibility);
-	}
-#endif  /* __OBJECT_MOTION__ */
+    return bvh_intersect_motion(kg, &ray, isect, visibility);
+  }
+#endif /* __OBJECT_MOTION__ */
 
 #ifdef __HAIR__
-	if(kernel_data.bvh.have_curves)
-		return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax);
-#endif  /* __HAIR__ */
+  if (kernel_data.bvh.have_curves)
+    return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax);
+#endif /* __HAIR__ */
 
 #ifdef __KERNEL_CPU__
 
 #  ifdef __INSTANCING__
-	if(kernel_data.bvh.have_instancing)
-		return bvh_intersect_instancing(kg, &ray, isect, visibility);
-#  endif  /* __INSTANCING__ */
+  if (kernel_data.bvh.have_instancing)
+    return bvh_intersect_instancing(kg, &ray, isect, visibility);
+#  endif /* __INSTANCING__ */
 
-	return bvh_intersect(kg, &ray, isect, visibility);
-#else  /* __KERNEL_CPU__ */
+  return bvh_intersect(kg, &ray, isect, visibility);
+#else /* __KERNEL_CPU__ */
 
 #  ifdef __INSTANCING__
-	return bvh_intersect_instancing(kg, &ray, isect, visibility);
+  return bvh_intersect_instancing(kg, &ray, isect, visibility);
 #  else
-	return bvh_intersect(kg, &ray, isect, visibility);
-#  endif  /* __INSTANCING__ */
+  return bvh_intersect(kg, &ray, isect, visibility);
+#  endif /* __INSTANCING__ */
 
-#endif  /* __KERNEL_CPU__ */
+#endif /* __KERNEL_CPU__ */
 }
 
 #ifdef __BVH_LOCAL__
@@ -250,77 +251,61 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
                                                 uint *lcg_state,
                                                 int max_hits)
 {
-	PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
+  PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
 
-	if(!scene_intersect_valid(&ray)) {
-		local_isect->num_hits = 0;
-		return false;
-	}
-#ifdef __EMBREE__
-	if(kernel_data.bvh.scene) {
-		CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS);
-		ctx.lcg_state = lcg_state;
-		ctx.max_hits = max_hits;
-		ctx.ss_isect = local_isect;
-		local_isect->num_hits = 0;
-		ctx.sss_object_id = local_object;
-		IntersectContext rtc_ctx(&ctx);
-		RTCRay rtc_ray;
-		kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
-
-		/* Get the Embree scene for this intersection. */
-		RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
-		if(geom) {
-			float3 P = ray.P;
-			float3 dir = ray.D;
-			float3 idir = ray.D;
-			const int object_flag = kernel_tex_fetch(__object_flag, local_object);
-			if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-				Transform ob_itfm;
-				rtc_ray.tfar = bvh_instance_motion_push(kg,
-				                                        local_object,
-				                                        &ray,
-				                                        &P,
-				                                        &dir,
-				                                        &idir,
-				                                        ray.t,
-				                                        &ob_itfm);
-				/* bvh_instance_motion_push() returns the inverse transform but
-				 * it's not needed here. */
-				(void) ob_itfm;
-
-				rtc_ray.org_x = P.x;
-				rtc_ray.org_y = P.y;
-				rtc_ray.org_z = P.z;
-				rtc_ray.dir_x = dir.x;
-				rtc_ray.dir_y = dir.y;
-				rtc_ray.dir_z = dir.z;
-			}
-			RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
-			if(scene) {
-				rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
-			}
-		}
-
-		return local_isect->num_hits > 0;
-	}
-#endif  /* __EMBREE__ */
-#ifdef __OBJECT_MOTION__
-	if(kernel_data.bvh.have_motion) {
-		return bvh_intersect_local_motion(kg,
-		                                  &ray,
-		                                  local_isect,
-		                                  local_object,
-		                                  lcg_state,
-		                                  max_hits);
-	}
-#endif  /* __OBJECT_MOTION__ */
-	return bvh_intersect_local(kg,
-	                            &ray,
-	                            local_isect,
-	                            local_object,
-	                            lcg_state,
-	                            max_hits);
+  if (!scene_intersect_valid(&ray)) {
+    local_isect->num_hits = 0;
+    return false;
+  }
+#  ifdef __EMBREE__
+  if (kernel_data.bvh.scene) {
+    CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS);
+    ctx.lcg_state = lcg_state;
+    ctx.max_hits = max_hits;
+    ctx.ss_isect = local_isect;
+    local_isect->num_hits = 0;
+    ctx.sss_object_id = local_object;
+    IntersectContext rtc_ctx(&ctx);
+    RTCRay rtc_ray;
+    kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
+
+    /* Get the Embree scene for this intersection. */
+    RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
+    if (geom) {
+      float3 P = ray.P;
+      float3 dir = ray.D;
+      float3 idir = ray.D;
+      const int object_flag = kernel_tex_fetch(__object_flag, local_object);
+      if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+        Transform ob_itfm;
+        rtc_ray.tfar = bvh_instance_motion_push(
+            kg, local_object, &ray, &P, &dir, &idir, ray.t, &ob_itfm);
+        /* bvh_instance_motion_push() returns the inverse transform but
+         * it's not needed here. */
+        (void)ob_itfm;
+
+        rtc_ray.org_x = P.x;
+        rtc_ray.org_y = P.y;
+        rtc_ray.org_z = P.z;
+        rtc_ray.dir_x = dir.x;
+        rtc_ray.dir_y = dir.y;
+        rtc_ray.dir_z = dir.z;
+      }
+      RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
+      if (scene) {
+        rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
+      }
+    }
+
+    return local_isect->num_hits > 0;
+  }
+#  endif /* __EMBREE__ */
+#  ifdef __OBJECT_MOTION__
+  if (kernel_data.bvh.have_motion) {
+    return bvh_intersect_local_motion(kg, &ray, local_isect, local_object, lcg_state, max_hits);
+  }
+#  endif /* __OBJECT_MOTION__ */
+  return bvh_intersect_local(kg, &ray, local_isect, local_object, lcg_state, max_hits);
 }
 #endif
 
@@ -332,82 +317,57 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
                                                      uint max_hits,
                                                      uint *num_hits)
 {
-	PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL);
+  PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL);
 
-	if(!scene_intersect_valid(ray)) {
-		*num_hits = 0;
-		return false;
-	}
+  if (!scene_intersect_valid(ray)) {
+    *num_hits = 0;
+    return false;
+  }
 #  ifdef __EMBREE__
-	if(kernel_data.bvh.scene) {
-		CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
-		ctx.isect_s = isect;
-		ctx.max_hits = max_hits;
-		ctx.num_hits = 0;
-		IntersectContext rtc_ctx(&ctx);
-		RTCRay rtc_ray;
-		kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW);
-		rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
-
-		if(ctx.num_hits > max_hits) {
-			return true;
-		}
-		*num_hits = ctx.num_hits;
-		return rtc_ray.tfar == -INFINITY;
-	}
+  if (kernel_data.bvh.scene) {
+    CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
+    ctx.isect_s = isect;
+    ctx.max_hits = max_hits;
+    ctx.num_hits = 0;
+    IntersectContext rtc_ctx(&ctx);
+    RTCRay rtc_ray;
+    kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW);
+    rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
+
+    if (ctx.num_hits > max_hits) {
+      return true;
+    }
+    *num_hits = ctx.num_hits;
+    return rtc_ray.tfar == -INFINITY;
+  }
 #  endif
 #  ifdef __OBJECT_MOTION__
-	if(kernel_data.bvh.have_motion) {
+  if (kernel_data.bvh.have_motion) {
 #    ifdef __HAIR__
-		if(kernel_data.bvh.have_curves) {
-			return bvh_intersect_shadow_all_hair_motion(kg,
-			                                            ray,
-			                                            isect,
-			                                            visibility,
-			                                            max_hits,
-			                                            num_hits);
-		}
-#    endif  /* __HAIR__ */
-
-		return bvh_intersect_shadow_all_motion(kg,
-		                                       ray,
-		                                       isect,
-		                                       visibility,
-		                                       max_hits,
-		                                       num_hits);
-	}
-#  endif  /* __OBJECT_MOTION__ */
+    if (kernel_data.bvh.have_curves) {
+      return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, visibility, max_hits, num_hits);
+    }
+#    endif /* __HAIR__ */
+
+    return bvh_intersect_shadow_all_motion(kg, ray, isect, visibility, max_hits, num_hits);
+  }
+#  endif /* __OBJECT_MOTION__ */
 
 #  ifdef __HAIR__
-	if(kernel_data.bvh.have_curves) {
-		return bvh_intersect_shadow_all_hair(kg,
-		                                     ray,
-		                                     isect,
-		                                     visibility,
-		                                     max_hits,
-		                                     num_hits);
-	}
-#  endif  /* __HAIR__ */
+  if (kernel_data.bvh.have_curves) {
+    return bvh_intersect_shadow_all_hair(kg, ray, isect, visibility, max_hits, num_hits);
+  }
+#  endif /* __HAIR__ */
 
 #  ifdef __INSTANCING__
-	if(kernel_data.bvh.have_instancing) {
-		return bvh_intersect_shadow_all_instancing(kg,
-		                                           ray,
-		                                           isect,
-		                                           visibility,
-		                                           max_hits,
-		                                           num_hits);
-	}
-#  endif  /* __INSTANCING__ */
-
-	return bvh_intersect_shadow_all(kg,
-	                                ray,
-	                                isect,
-	                                visibility,
-	                                max_hits,
-	                                num_hits);
+  if (kernel_data.bvh.have_instancing) {
+    return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
+  }
+#  endif /* __INSTANCING__ */
+
+  return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
 }
-#endif  /* __SHADOW_RECORD_ALL__ */
+#endif /* __SHADOW_RECORD_ALL__ */
 
 #ifdef __VOLUME__
 ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
@@ -415,31 +375,31 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
                                                  Intersection *isect,
                                                  const uint visibility)
 {
-	PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME);
+  PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME);
 
-	if(!scene_intersect_valid(ray)) {
-		return false;
-	}
+  if (!scene_intersect_valid(ray)) {
+    return false;
+  }
 #  ifdef __OBJECT_MOTION__
-	if(kernel_data.bvh.have_motion) {
-		return bvh_intersect_volume_motion(kg, ray, isect, visibility);
-	}
-#  endif  /* __OBJECT_MOTION__ */
+  if (kernel_data.bvh.have_motion) {
+    return bvh_intersect_volume_motion(kg, ray, isect, visibility);
+  }
+#  endif /* __OBJECT_MOTION__ */
 #  ifdef __KERNEL_CPU__
 #    ifdef __INSTANCING__
-	if(kernel_data.bvh.have_instancing)
-		return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
-#    endif  /* __INSTANCING__ */
-	return bvh_intersect_volume(kg, ray, isect, visibility);
-#  else  /* __KERNEL_CPU__ */
+  if (kernel_data.bvh.have_instancing)
+    return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
+#    endif /* __INSTANCING__ */
+  return bvh_intersect_volume(kg, ray, isect, visibility);
+#  else /* __KERNEL_CPU__ */
 #    ifdef __INSTANCING__
-	return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
+  return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
 #    else
-	return bvh_intersect_volume(kg, ray, isect, visibility);
-#    endif  /* __INSTANCING__ */
-#  endif  /* __KERNEL_CPU__ */
+  return bvh_intersect_volume(kg, ray, isect, visibility);
+#    endif /* __INSTANCING__ */
+#  endif   /* __KERNEL_CPU__ */
 }
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 #ifdef __VOLUME_RECORD_ALL__
 ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
@@ -448,37 +408,36 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
                                                      const uint max_hits,
                                                      const uint visibility)
 {
-	PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_ALL);
+  PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_ALL);
 
-	if(!scene_intersect_valid(ray)) {
-		return false;
-	}
+  if (!scene_intersect_valid(ray)) {
+    return false;
+  }
 #  ifdef __EMBREE__
-	if(kernel_data.bvh.scene) {
-		CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
-		ctx.isect_s = isect;
-		ctx.max_hits = max_hits;
-		ctx.num_hits = 0;
-		IntersectContext rtc_ctx(&ctx);
-		RTCRay rtc_ray;
-		kernel_embree_setup_ray(*ray, rtc_ray, visibility);
-		rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
-		return rtc_ray.tfar == -INFINITY;
-	}
+  if (kernel_data.bvh.scene) {
+    CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
+    ctx.isect_s = isect;
+    ctx.max_hits = max_hits;
+    ctx.num_hits = 0;
+    IntersectContext rtc_ctx(&ctx);
+    RTCRay rtc_ray;
+    kernel_embree_setup_ray(*ray, rtc_ray, visibility);
+    rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
+    return rtc_ray.tfar == -INFINITY;
+  }
 #  endif
 #  ifdef __OBJECT_MOTION__
-	if(kernel_data.bvh.have_motion) {
-		return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
-	}
-#  endif  /* __OBJECT_MOTION__ */
+  if (kernel_data.bvh.have_motion) {
+    return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
+  }
+#  endif /* __OBJECT_MOTION__ */
 #  ifdef __INSTANCING__
-	if(kernel_data.bvh.have_instancing)
-		return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
-#  endif  /* __INSTANCING__ */
-	return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
+  if (kernel_data.bvh.have_instancing)
+    return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
+#  endif /* __INSTANCING__ */
+  return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
 }
-#endif  /* __VOLUME_RECORD_ALL__ */
-
+#endif /* __VOLUME_RECORD_ALL__ */
 
 /* Ray offset to avoid self intersection.
  *
@@ -488,48 +447,48 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
 ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
 {
 #ifdef __INTERSECTION_REFINE__
-	const float epsilon_f = 1e-5f;
-	/* ideally this should match epsilon_f, but instancing and motion blur
-	 * precision makes it problematic */
-	const float epsilon_test = 1.0f;
-	const int epsilon_i = 32;
-
-	float3 res;
-
-	/* x component */
-	if(fabsf(P.x) < epsilon_test) {
-		res.x = P.x + Ng.x*epsilon_f;
-	}
-	else {
-		uint ix = __float_as_uint(P.x);
-		ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i;
-		res.x = __uint_as_float(ix);
-	}
-
-	/* y component */
-	if(fabsf(P.y) < epsilon_test) {
-		res.y = P.y + Ng.y*epsilon_f;
-	}
-	else {
-		uint iy = __float_as_uint(P.y);
-		iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i;
-		res.y = __uint_as_float(iy);
-	}
-
-	/* z component */
-	if(fabsf(P.z) < epsilon_test) {
-		res.z = P.z + Ng.z*epsilon_f;
-	}
-	else {
-		uint iz = __float_as_uint(P.z);
-		iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i;
-		res.z = __uint_as_float(iz);
-	}
-
-	return res;
+  const float epsilon_f = 1e-5f;
+  /* ideally this should match epsilon_f, but instancing and motion blur
+   * precision makes it problematic */
+  const float epsilon_test = 1.0f;
+  const int epsilon_i = 32;
+
+  float3 res;
+
+  /* x component */
+  if (fabsf(P.x) < epsilon_test) {
+    res.x = P.x + Ng.x * epsilon_f;
+  }
+  else {
+    uint ix = __float_as_uint(P.x);
+    ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i;
+    res.x = __uint_as_float(ix);
+  }
+
+  /* y component */
+  if (fabsf(P.y) < epsilon_test) {
+    res.y = P.y + Ng.y * epsilon_f;
+  }
+  else {
+    uint iy = __float_as_uint(P.y);
+    iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i;
+    res.y = __uint_as_float(iy);
+  }
+
+  /* z component */
+  if (fabsf(P.z) < epsilon_test) {
+    res.z = P.z + Ng.z * epsilon_f;
+  }
+  else {
+    uint iz = __float_as_uint(P.z);
+    iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i;
+    res.z = __uint_as_float(iz);
+  }
+
+  return res;
 #else
-	const float epsilon_f = 1e-4f;
-	return P + epsilon_f*Ng;
+  const float epsilon_f = 1e-4f;
+  return P + epsilon_f * Ng;
 #endif
 }
 
@@ -537,40 +496,40 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
 /* ToDo: Move to another file? */
 ccl_device int intersections_compare(const void *a, const void *b)
 {
-	const Intersection *isect_a = (const Intersection*)a;
-	const Intersection *isect_b = (const Intersection*)b;
-
-	if(isect_a->t < isect_b->t)
-		return -1;
-	else if(isect_a->t > isect_b->t)
-		return 1;
-	else
-		return 0;
+  const Intersection *isect_a = (const Intersection *)a;
+  const Intersection *isect_b = (const Intersection *)b;
+
+  if (isect_a->t < isect_b->t)
+    return -1;
+  else if (isect_a->t > isect_b->t)
+    return 1;
+  else
+    return 0;
 }
 #endif
 
 #if defined(__SHADOW_RECORD_ALL__)
 ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
 {
-#ifdef __KERNEL_GPU__
-	/* Use bubble sort which has more friendly memory pattern on GPU. */
-	bool swapped;
-	do {
-		swapped = false;
-		for(int j = 0; j < num_hits - 1; ++j) {
-			if(hits[j].t > hits[j + 1].t) {
-				struct Intersection tmp = hits[j];
-				hits[j] = hits[j + 1];
-				hits[j + 1] = tmp;
-				swapped = true;
-			}
-		}
-		--num_hits;
-	} while(swapped);
-#else
-	qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-#endif
+#  ifdef __KERNEL_GPU__
+  /* Use bubble sort which has more friendly memory pattern on GPU. */
+  bool swapped;
+  do {
+    swapped = false;
+    for (int j = 0; j < num_hits - 1; ++j) {
+      if (hits[j].t > hits[j + 1].t) {
+        struct Intersection tmp = hits[j];
+        hits[j] = hits[j + 1];
+        hits[j + 1] = tmp;
+        swapped = true;
+      }
+    }
+    --num_hits;
+  } while (swapped);
+#  else
+  qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+#  endif
 }
-#endif  /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */
+#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h
index bfc911a1e76..661bba54fd4 100644
--- a/intern/cycles/kernel/bvh/bvh_embree.h
+++ b/intern/cycles/kernel/bvh/bvh_embree.h
@@ -24,103 +24,120 @@
 
 CCL_NAMESPACE_BEGIN
 
-struct CCLIntersectContext  {
-	typedef enum {
-		RAY_REGULAR = 0,
-		RAY_SHADOW_ALL = 1,
-		RAY_SSS = 2,
-		RAY_VOLUME_ALL = 3,
+struct CCLIntersectContext {
+  typedef enum {
+    RAY_REGULAR = 0,
+    RAY_SHADOW_ALL = 1,
+    RAY_SSS = 2,
+    RAY_VOLUME_ALL = 3,
 
-	} RayType;
+  } RayType;
 
-	KernelGlobals *kg;
-	RayType type;
+  KernelGlobals *kg;
+  RayType type;
 
-	/* for shadow rays */
-	Intersection *isect_s;
-	int max_hits;
-	int num_hits;
+  /* for shadow rays */
+  Intersection *isect_s;
+  int max_hits;
+  int num_hits;
 
-	/* for SSS Rays: */
-	LocalIntersection *ss_isect;
-	int sss_object_id;
-	uint *lcg_state;
+  /* for SSS Rays: */
+  LocalIntersection *ss_isect;
+  int sss_object_id;
+  uint *lcg_state;
 
-	CCLIntersectContext(KernelGlobals *kg_,  RayType type_)
-	{
-		kg = kg_;
-		type = type_;
-		max_hits = 1;
-		num_hits = 0;
-		isect_s = NULL;
-		ss_isect = NULL;
-		sss_object_id = -1;
-		lcg_state = NULL;
-	}
+  CCLIntersectContext(KernelGlobals *kg_, RayType type_)
+  {
+    kg = kg_;
+    type = type_;
+    max_hits = 1;
+    num_hits = 0;
+    isect_s = NULL;
+    ss_isect = NULL;
+    sss_object_id = -1;
+    lcg_state = NULL;
+  }
 };
 
-class IntersectContext
-{
-public:
-	IntersectContext(CCLIntersectContext* ctx)
-	{
-		rtcInitIntersectContext(&context);
-		userRayExt = ctx;
-	}
-	RTCIntersectContext context;
-	CCLIntersectContext* userRayExt;
+class IntersectContext {
+ public:
+  IntersectContext(CCLIntersectContext *ctx)
+  {
+    rtcInitIntersectContext(&context);
+    userRayExt = ctx;
+  }
+  RTCIntersectContext context;
+  CCLIntersectContext *userRayExt;
 };
 
-ccl_device_inline void kernel_embree_setup_ray(const Ray& ray, RTCRay& rtc_ray, const uint visibility)
+ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
+                                               RTCRay &rtc_ray,
+                                               const uint visibility)
 {
-	rtc_ray.org_x = ray.P.x;
-	rtc_ray.org_y = ray.P.y;
-	rtc_ray.org_z = ray.P.z;
-	rtc_ray.dir_x = ray.D.x;
-	rtc_ray.dir_y = ray.D.y;
-	rtc_ray.dir_z = ray.D.z;
-	rtc_ray.tnear = 0.0f;
-	rtc_ray.tfar = ray.t;
-	rtc_ray.time = ray.time;
-	rtc_ray.mask = visibility;
+  rtc_ray.org_x = ray.P.x;
+  rtc_ray.org_y = ray.P.y;
+  rtc_ray.org_z = ray.P.z;
+  rtc_ray.dir_x = ray.D.x;
+  rtc_ray.dir_y = ray.D.y;
+  rtc_ray.dir_z = ray.D.z;
+  rtc_ray.tnear = 0.0f;
+  rtc_ray.tfar = ray.t;
+  rtc_ray.time = ray.time;
+  rtc_ray.mask = visibility;
 }
 
-ccl_device_inline void kernel_embree_setup_rayhit(const Ray& ray, RTCRayHit& rayhit, const uint visibility)
+ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
+                                                  RTCRayHit &rayhit,
+                                                  const uint visibility)
 {
-	kernel_embree_setup_ray(ray, rayhit.ray, visibility);
-	rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
-	rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID;
+  kernel_embree_setup_ray(ray, rayhit.ray, visibility);
+  rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
+  rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID;
 }
 
-ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect)
+ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg,
+                                                 const RTCRay *ray,
+                                                 const RTCHit *hit,
+                                                 Intersection *isect)
 {
-	bool is_hair = hit->geomID & 1;
-	isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u;
-	isect->v = is_hair ? hit->v : hit->u;
-	isect->t = ray->tfar;
-	isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
-	if(hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
-		RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
-		isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, hit->instID[0]/2);
-		isect->object = hit->instID[0]/2;
-	}
-	else {
-		isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
-		isect->object = OBJECT_NONE;
-	}
-	isect->type = kernel_tex_fetch(__prim_type, isect->prim);
+  bool is_hair = hit->geomID & 1;
+  isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u;
+  isect->v = is_hair ? hit->v : hit->u;
+  isect->t = ray->tfar;
+  isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
+  if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
+    RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+        rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
+    isect->prim = hit->primID +
+                  (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) +
+                  kernel_tex_fetch(__object_node, hit->instID[0] / 2);
+    isect->object = hit->instID[0] / 2;
+  }
+  else {
+    isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
+                                    rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
+    isect->object = OBJECT_NONE;
+  }
+  isect->type = kernel_tex_fetch(__prim_type, isect->prim);
 }
 
-ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int local_object_id)
+ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg,
+                                                       const RTCRay *ray,
+                                                       const RTCHit *hit,
+                                                       Intersection *isect,
+                                                       int local_object_id)
 {
-	isect->u = 1.0f - hit->v - hit->u;
-	isect->v = hit->u;
-	isect->t = ray->tfar;
-	isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
-	RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2));
-	isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, local_object_id);
-	isect->object = local_object_id;
-	isect->type = kernel_tex_fetch(__prim_type, isect->prim);
+  isect->u = 1.0f - hit->v - hit->u;
+  isect->v = hit->u;
+  isect->t = ray->tfar;
+  isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
+  RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+      rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2));
+  isect->prim = hit->primID +
+                (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) +
+                kernel_tex_fetch(__object_node, local_object_id);
+  isect->object = local_object_id;
+  isect->type = kernel_tex_fetch(__prim_type, isect->prim);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 3bdc9293a6c..7a069ef1108 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -43,208 +43,201 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
-                                 const Ray *ray,
-                                 LocalIntersection *local_isect,
-                                 int local_object,
-                                 uint *lcg_state,
-                                 int max_hits)
+    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+                                     const Ray *ray,
+                                     LocalIntersection *local_isect,
+                                     int local_object,
+                                     uint *lcg_state,
+                                     int max_hits)
 {
-	/* todo:
-	 * - test if pushing distance on the stack helps (for non shadow rays)
-	 * - separate version for shadow rays
-	 * - likely and unlikely for if() statements
-	 * - test restrict attribute for pointers
-	 */
+  /* todo:
+   * - test if pushing distance on the stack helps (for non shadow rays)
+   * - separate version for shadow rays
+   * - likely and unlikely for if() statements
+   * - test restrict attribute for pointers
+   */
 
-	/* traversal stack in CUDA thread-local memory */
-	int traversal_stack[BVH_STACK_SIZE];
-	traversal_stack[0] = ENTRYPOINT_SENTINEL;
+  /* traversal stack in CUDA thread-local memory */
+  int traversal_stack[BVH_STACK_SIZE];
+  traversal_stack[0] = ENTRYPOINT_SENTINEL;
 
-	/* traversal variables in registers */
-	int stack_ptr = 0;
-	int node_addr = kernel_tex_fetch(__object_node, local_object);
+  /* traversal variables in registers */
+  int stack_ptr = 0;
+  int node_addr = kernel_tex_fetch(__object_node, local_object);
 
-	/* ray parameters in registers */
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
-	float isect_t = ray->t;
+  /* ray parameters in registers */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  float isect_t = ray->t;
 
-	if(local_isect != NULL) {
-		local_isect->num_hits = 0;
-	}
-	kernel_assert((local_isect == NULL) == (max_hits == 0));
+  if (local_isect != NULL) {
+    local_isect->num_hits = 0;
+  }
+  kernel_assert((local_isect == NULL) == (max_hits == 0));
 
-	const int object_flag = kernel_tex_fetch(__object_flag, local_object);
-	if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+  const int object_flag = kernel_tex_fetch(__object_flag, local_object);
+  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
 #if BVH_FEATURE(BVH_MOTION)
-		Transform ob_itfm;
-		isect_t = bvh_instance_motion_push(kg,
-		                                   local_object,
-		                                   ray,
-		                                   &P,
-		                                   &dir,
-		                                   &idir,
-		                                   isect_t,
-		                                   &ob_itfm);
+    Transform ob_itfm;
+    isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #else
-		isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
+    isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
 #endif
-		object = local_object;
-	}
+    object = local_object;
+  }
 
 #if defined(__KERNEL_SSE2__)
-	const shuffle_swap_t shuf_identity = shuffle_swap_identity();
-	const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+  const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+  const shuffle_swap_t shuf_swap = shuffle_swap_swap();
 
-	const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-	ssef Psplat[3], idirsplat[3];
+  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+  ssef Psplat[3], idirsplat[3];
 #  if BVH_FEATURE(BVH_HAIR)
-	ssef tnear(0.0f), tfar(isect_t);
+  ssef tnear(0.0f), tfar(isect_t);
 #  endif
-	shuffle_swap_t shufflexyz[3];
+  shuffle_swap_t shufflexyz[3];
 
-	Psplat[0] = ssef(P.x);
-	Psplat[1] = ssef(P.y);
-	Psplat[2] = ssef(P.z);
+  Psplat[0] = ssef(P.x);
+  Psplat[1] = ssef(P.y);
+  Psplat[2] = ssef(P.z);
 
-	ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
+  ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
 
-	gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+  gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #endif
 
-	/* traversal loop */
-	do {
-		do {
-			/* traverse internal nodes */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				int node_addr_child1, traverse_mask;
-				float dist[2];
-				float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+  /* traversal loop */
+  do {
+    do {
+      /* traverse internal nodes */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        int node_addr_child1, traverse_mask;
+        float dist[2];
+        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
 #if !defined(__KERNEL_SSE2__)
-				traverse_mask = NODE_INTERSECT(kg,
-				                               P,
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
 #  if BVH_FEATURE(BVH_HAIR)
-				                               dir,
+                                       dir,
 #  endif
-				                               idir,
-				                               isect_t,
-				                               node_addr,
-				                               PATH_RAY_ALL_VISIBILITY,
-				                               dist);
+                                       idir,
+                                       isect_t,
+                                       node_addr,
+                                       PATH_RAY_ALL_VISIBILITY,
+                                       dist);
 #else  // __KERNEL_SSE2__
-				traverse_mask = NODE_INTERSECT(kg,
-				                               P,
-				                               dir,
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
+                                       dir,
 #  if BVH_FEATURE(BVH_HAIR)
-				                               tnear,
-				                               tfar,
+                                       tnear,
+                                       tfar,
 #  endif
-				                               tsplat,
-				                               Psplat,
-				                               idirsplat,
-				                               shufflexyz,
-				                               node_addr,
-				                               PATH_RAY_ALL_VISIBILITY,
-				                               dist);
+                                       tsplat,
+                                       Psplat,
+                                       idirsplat,
+                                       shufflexyz,
+                                       node_addr,
+                                       PATH_RAY_ALL_VISIBILITY,
+                                       dist);
 #endif  // __KERNEL_SSE2__
 
-				node_addr = __float_as_int(cnodes.z);
-				node_addr_child1 = __float_as_int(cnodes.w);
+        node_addr = __float_as_int(cnodes.z);
+        node_addr_child1 = __float_as_int(cnodes.w);
 
-				if(traverse_mask == 3) {
-					/* Both children were intersected, push the farther one. */
-					bool is_closest_child1 = (dist[1] < dist[0]);
-					if(is_closest_child1) {
-						int tmp = node_addr;
-						node_addr = node_addr_child1;
-						node_addr_child1 = tmp;
-					}
+        if (traverse_mask == 3) {
+          /* Both children were intersected, push the farther one. */
+          bool is_closest_child1 = (dist[1] < dist[0]);
+          if (is_closest_child1) {
+            int tmp = node_addr;
+            node_addr = node_addr_child1;
+            node_addr_child1 = tmp;
+          }
 
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_STACK_SIZE);
-					traversal_stack[stack_ptr] = node_addr_child1;
-				}
-				else {
-					/* One child was intersected. */
-					if(traverse_mask == 2) {
-						node_addr = node_addr_child1;
-					}
-					else if(traverse_mask == 0) {
-						/* Neither child was intersected. */
-						node_addr = traversal_stack[stack_ptr];
-						--stack_ptr;
-					}
-				}
-			}
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = node_addr_child1;
+        }
+        else {
+          /* One child was intersected. */
+          if (traverse_mask == 2) {
+            node_addr = node_addr_child1;
+          }
+          else if (traverse_mask == 0) {
+            /* Neither child was intersected. */
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
 
-			/* if node is leaf, fetch triangle list */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-				int prim_addr = __float_as_int(leaf.x);
+      /* if node is leaf, fetch triangle list */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
 
-				const int prim_addr2 = __float_as_int(leaf.y);
-				const uint type = __float_as_int(leaf.w);
+        const int prim_addr2 = __float_as_int(leaf.y);
+        const uint type = __float_as_int(leaf.w);
 
-				/* pop */
-				node_addr = traversal_stack[stack_ptr];
-				--stack_ptr;
+        /* pop */
+        node_addr = traversal_stack[stack_ptr];
+        --stack_ptr;
 
-				/* primitive intersection */
-				switch(type & PRIMITIVE_ALL) {
-					case PRIMITIVE_TRIANGLE: {
-						/* intersect ray against primitive */
-						for(; prim_addr < prim_addr2; prim_addr++) {
-							kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-							if(triangle_intersect_local(kg,
-							                            local_isect,
-							                            P,
-							                            dir,
-							                            object,
-							                            local_object,
-							                            prim_addr,
-							                            isect_t,
-							                            lcg_state,
-							                            max_hits)) {
-								return true;
-							}
-						}
-						break;
-					}
+        /* primitive intersection */
+        switch (type & PRIMITIVE_ALL) {
+          case PRIMITIVE_TRIANGLE: {
+            /* intersect ray against primitive */
+            for (; prim_addr < prim_addr2; prim_addr++) {
+              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+              if (triangle_intersect_local(kg,
+                                           local_isect,
+                                           P,
+                                           dir,
+                                           object,
+                                           local_object,
+                                           prim_addr,
+                                           isect_t,
+                                           lcg_state,
+                                           max_hits)) {
+                return true;
+              }
+            }
+            break;
+          }
 #if BVH_FEATURE(BVH_MOTION)
-					case PRIMITIVE_MOTION_TRIANGLE: {
-						/* intersect ray against primitive */
-						for(; prim_addr < prim_addr2; prim_addr++) {
-							kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-							if(motion_triangle_intersect_local(kg,
-							                                   local_isect,
-							                                   P,
-							                                   dir,
-							                                   ray->time,
-							                                   object,
-							                                   local_object,
-							                                   prim_addr,
-							                                   isect_t,
-							                                   lcg_state,
-							                                   max_hits)) {
-								return true;
-							}
-						}
-						break;
-					}
+          case PRIMITIVE_MOTION_TRIANGLE: {
+            /* intersect ray against primitive */
+            for (; prim_addr < prim_addr2; prim_addr++) {
+              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+              if (motion_triangle_intersect_local(kg,
+                                                  local_isect,
+                                                  P,
+                                                  dir,
+                                                  ray->time,
+                                                  object,
+                                                  local_object,
+                                                  prim_addr,
+                                                  isect_t,
+                                                  lcg_state,
+                                                  max_hits)) {
+                return true;
+              }
+            }
+            break;
+          }
 #endif
-					default: {
-						break;
-					}
-				}
-			}
-		} while(node_addr != ENTRYPOINT_SENTINEL);
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+          default: {
+            break;
+          }
+        }
+      }
+    } while (node_addr != ENTRYPOINT_SENTINEL);
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return false;
+  return false;
 }
 
 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
@@ -254,35 +247,20 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
                                          uint *lcg_state,
                                          int max_hits)
 {
-	switch(kernel_data.bvh.bvh_layout) {
+  switch (kernel_data.bvh.bvh_layout) {
 #ifdef __KERNEL_AVX2__
-		case BVH_LAYOUT_BVH8:
-			return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
-			                                    ray,
-			                                    local_isect,
-			                                    local_object,
-			                                    lcg_state,
-			                                    max_hits);
+    case BVH_LAYOUT_BVH8:
+      return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
 #endif
 #ifdef __QBVH__
-		case BVH_LAYOUT_BVH4:
-			return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
-			                                    ray,
-			                                    local_isect,
-			                                    local_object,
-			                                    lcg_state,
-			                                    max_hits);
+    case BVH_LAYOUT_BVH4:
+      return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
 #endif
-		case BVH_LAYOUT_BVH2:
-			return BVH_FUNCTION_FULL_NAME(BVH)(kg,
-			                                   ray,
-			                                   local_isect,
-			                                   local_object,
-			                                   lcg_state,
-			                                   max_hits);
-	}
-	kernel_assert(!"Should not happen");
-	return false;
+    case BVH_LAYOUT_BVH2:
+      return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
+  }
+  kernel_assert(!"Should not happen");
+  return false;
 }
 
 #undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index 060b3934a41..042630121c8 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -20,12 +20,12 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
                                                                 int node_addr,
                                                                 int child)
 {
-	Transform space;
-	const int child_addr = node_addr + child * 3;
-	space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
-	space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
-	space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
-	return space;
+  Transform space;
+  const int child_addr = node_addr + child * 3;
+  space.x = kernel_tex_fetch(__bvh_nodes, child_addr + 1);
+  space.y = kernel_tex_fetch(__bvh_nodes, child_addr + 2);
+  space.z = kernel_tex_fetch(__bvh_nodes, child_addr + 3);
+  return space;
 }
 
 #if !defined(__KERNEL_SSE2__)
@@ -38,42 +38,41 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
                                                       float dist[2])
 {
 
-	/* fetch node data */
-	float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-	float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
-	float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
-	float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
-
-	/* intersect ray against child nodes */
-	float c0lox = (node0.x - P.x) * idir.x;
-	float c0hix = (node0.z - P.x) * idir.x;
-	float c0loy = (node1.x - P.y) * idir.y;
-	float c0hiy = (node1.z - P.y) * idir.y;
-	float c0loz = (node2.x - P.z) * idir.z;
-	float c0hiz = (node2.z - P.z) * idir.z;
-	float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
-	float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
-
-	float c1lox = (node0.y - P.x) * idir.x;
-	float c1hix = (node0.w - P.x) * idir.x;
-	float c1loy = (node1.y - P.y) * idir.y;
-	float c1hiy = (node1.w - P.y) * idir.y;
-	float c1loz = (node2.y - P.z) * idir.z;
-	float c1hiz = (node2.w - P.z) * idir.z;
-	float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
-	float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
-
-	dist[0] = c0min;
-	dist[1] = c1min;
-
-#ifdef __VISIBILITY_FLAG__
-	/* this visibility test gives a 5% performance hit, how to solve? */
-	return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
-	       (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-#else
-	return ((c0max >= c0min)? 1: 0) |
-	       ((c1max >= c1min)? 2: 0);
-#endif
+  /* fetch node data */
+  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+  float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
+  float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
+  float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
+
+  /* intersect ray against child nodes */
+  float c0lox = (node0.x - P.x) * idir.x;
+  float c0hix = (node0.z - P.x) * idir.x;
+  float c0loy = (node1.x - P.y) * idir.y;
+  float c0hiy = (node1.z - P.y) * idir.y;
+  float c0loz = (node2.x - P.z) * idir.z;
+  float c0hiz = (node2.z - P.z) * idir.z;
+  float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
+  float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
+
+  float c1lox = (node0.y - P.x) * idir.x;
+  float c1hix = (node0.w - P.x) * idir.x;
+  float c1loy = (node1.y - P.y) * idir.y;
+  float c1hiy = (node1.w - P.y) * idir.y;
+  float c1loz = (node2.y - P.z) * idir.z;
+  float c1hiz = (node2.w - P.z) * idir.z;
+  float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
+  float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
+
+  dist[0] = c0min;
+  dist[1] = c1min;
+
+#  ifdef __VISIBILITY_FLAG__
+  /* this visibility test gives a 5% performance hit, how to solve? */
+  return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+         (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+#  else
+  return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
+#  endif
 }
 
 ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
@@ -87,118 +86,115 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
                                                              float dist[2])
 {
 
-	/* fetch node data */
-	float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-	float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
-	float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
-	float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
-
-	/* intersect ray against child nodes */
-	float c0lox = (node0.x - P.x) * idir.x;
-	float c0hix = (node0.z - P.x) * idir.x;
-	float c0loy = (node1.x - P.y) * idir.y;
-	float c0hiy = (node1.z - P.y) * idir.y;
-	float c0loz = (node2.x - P.z) * idir.z;
-	float c0hiz = (node2.z - P.z) * idir.z;
-	float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
-	float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
-
-	float c1lox = (node0.y - P.x) * idir.x;
-	float c1hix = (node0.w - P.x) * idir.x;
-	float c1loy = (node1.y - P.y) * idir.y;
-	float c1hiy = (node1.w - P.y) * idir.y;
-	float c1loz = (node2.y - P.z) * idir.z;
-	float c1hiz = (node2.w - P.z) * idir.z;
-	float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
-	float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
-
-	if(difl != 0.0f) {
-		float hdiff = 1.0f + difl;
-		float ldiff = 1.0f - difl;
-		if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
-			c0min = max(ldiff * c0min, c0min - extmax);
-			c0max = min(hdiff * c0max, c0max + extmax);
-		}
-		if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
-			c1min = max(ldiff * c1min, c1min - extmax);
-			c1max = min(hdiff * c1max, c1max + extmax);
-		}
-	}
-
-	dist[0] = c0min;
-	dist[1] = c1min;
-
-#ifdef __VISIBILITY_FLAG__
-	/* this visibility test gives a 5% performance hit, how to solve? */
-	return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
-	       (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-#else
-	return ((c0max >= c0min)? 1: 0) |
-	       ((c1max >= c1min)? 2: 0);
-#endif
+  /* fetch node data */
+  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+  float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
+  float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
+  float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
+
+  /* intersect ray against child nodes */
+  float c0lox = (node0.x - P.x) * idir.x;
+  float c0hix = (node0.z - P.x) * idir.x;
+  float c0loy = (node1.x - P.y) * idir.y;
+  float c0hiy = (node1.z - P.y) * idir.y;
+  float c0loz = (node2.x - P.z) * idir.z;
+  float c0hiz = (node2.z - P.z) * idir.z;
+  float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
+  float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
+
+  float c1lox = (node0.y - P.x) * idir.x;
+  float c1hix = (node0.w - P.x) * idir.x;
+  float c1loy = (node1.y - P.y) * idir.y;
+  float c1hiy = (node1.w - P.y) * idir.y;
+  float c1loz = (node2.y - P.z) * idir.z;
+  float c1hiz = (node2.w - P.z) * idir.z;
+  float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
+  float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
+
+  if (difl != 0.0f) {
+    float hdiff = 1.0f + difl;
+    float ldiff = 1.0f - difl;
+    if (__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
+      c0min = max(ldiff * c0min, c0min - extmax);
+      c0max = min(hdiff * c0max, c0max + extmax);
+    }
+    if (__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
+      c1min = max(ldiff * c1min, c1min - extmax);
+      c1max = min(hdiff * c1max, c1max + extmax);
+    }
+  }
+
+  dist[0] = c0min;
+  dist[1] = c1min;
+
+#  ifdef __VISIBILITY_FLAG__
+  /* this visibility test gives a 5% performance hit, how to solve? */
+  return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+         (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+#  else
+  return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
+#  endif
 }
 
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child(
-        KernelGlobals *kg,
-        const float3 P,
-        const float3 dir,
-        const float t,
-        int node_addr,
-        int child,
-        float dist[2])
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg,
+                                                               const float3 P,
+                                                               const float3 dir,
+                                                               const float t,
+                                                               int node_addr,
+                                                               int child,
+                                                               float dist[2])
 {
-	Transform space  = bvh_unaligned_node_fetch_space(kg, node_addr, child);
-	float3 aligned_dir = transform_direction(&space, dir);
-	float3 aligned_P = transform_point(&space, P);
-	float3 nrdir = -bvh_inverse_direction(aligned_dir);
-	float3 lower_xyz = aligned_P * nrdir;
-	float3 upper_xyz = lower_xyz - nrdir;
-	const float near_x = min(lower_xyz.x, upper_xyz.x);
-	const float near_y = min(lower_xyz.y, upper_xyz.y);
-	const float near_z = min(lower_xyz.z, upper_xyz.z);
-	const float far_x  = max(lower_xyz.x, upper_xyz.x);
-	const float far_y  = max(lower_xyz.y, upper_xyz.y);
-	const float far_z  = max(lower_xyz.z, upper_xyz.z);
-	const float tnear   = max4(0.0f, near_x, near_y, near_z);
-	const float tfar    = min4(t, far_x, far_y, far_z);
-	*dist = tnear;
-	return tnear <= tfar;
+  Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
+  float3 aligned_dir = transform_direction(&space, dir);
+  float3 aligned_P = transform_point(&space, P);
+  float3 nrdir = -bvh_inverse_direction(aligned_dir);
+  float3 lower_xyz = aligned_P * nrdir;
+  float3 upper_xyz = lower_xyz - nrdir;
+  const float near_x = min(lower_xyz.x, upper_xyz.x);
+  const float near_y = min(lower_xyz.y, upper_xyz.y);
+  const float near_z = min(lower_xyz.z, upper_xyz.z);
+  const float far_x = max(lower_xyz.x, upper_xyz.x);
+  const float far_y = max(lower_xyz.y, upper_xyz.y);
+  const float far_z = max(lower_xyz.z, upper_xyz.z);
+  const float tnear = max4(0.0f, near_x, near_y, near_z);
+  const float tfar = min4(t, far_x, far_y, far_z);
+  *dist = tnear;
+  return tnear <= tfar;
 }
 
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(
-        KernelGlobals *kg,
-        const float3 P,
-        const float3 dir,
-        const float t,
-        const float difl,
-        int node_addr,
-        int child,
-        float dist[2])
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(KernelGlobals *kg,
+                                                                      const float3 P,
+                                                                      const float3 dir,
+                                                                      const float t,
+                                                                      const float difl,
+                                                                      int node_addr,
+                                                                      int child,
+                                                                      float dist[2])
 {
-	Transform space  = bvh_unaligned_node_fetch_space(kg, node_addr, child);
-	float3 aligned_dir = transform_direction(&space, dir);
-	float3 aligned_P = transform_point(&space, P);
-	float3 nrdir = -bvh_inverse_direction(aligned_dir);
-	float3 tLowerXYZ = aligned_P * nrdir;
-	float3 tUpperXYZ = tLowerXYZ - nrdir;
-	const float near_x = min(tLowerXYZ.x, tUpperXYZ.x);
-	const float near_y = min(tLowerXYZ.y, tUpperXYZ.y);
-	const float near_z = min(tLowerXYZ.z, tUpperXYZ.z);
-	const float far_x  = max(tLowerXYZ.x, tUpperXYZ.x);
-	const float far_y  = max(tLowerXYZ.y, tUpperXYZ.y);
-	const float far_z  = max(tLowerXYZ.z, tUpperXYZ.z);
-	const float tnear   = max4(0.0f, near_x, near_y, near_z);
-	const float tfar    = min4(t, far_x, far_y, far_z);
-	*dist = tnear;
-	if(difl != 0.0f) {
-		/* TODO(sergey): Same as for QBVH, needs a proper use. */
-		const float round_down = 1.0f - difl;
-		const float round_up = 1.0f + difl;
-		return round_down*tnear <= round_up*tfar;
-	}
-	else {
-		return tnear <= tfar;
-	}
+  Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
+  float3 aligned_dir = transform_direction(&space, dir);
+  float3 aligned_P = transform_point(&space, P);
+  float3 nrdir = -bvh_inverse_direction(aligned_dir);
+  float3 tLowerXYZ = aligned_P * nrdir;
+  float3 tUpperXYZ = tLowerXYZ - nrdir;
+  const float near_x = min(tLowerXYZ.x, tUpperXYZ.x);
+  const float near_y = min(tLowerXYZ.y, tUpperXYZ.y);
+  const float near_z = min(tLowerXYZ.z, tUpperXYZ.z);
+  const float far_x = max(tLowerXYZ.x, tUpperXYZ.x);
+  const float far_y = max(tLowerXYZ.y, tUpperXYZ.y);
+  const float far_z = max(tLowerXYZ.z, tUpperXYZ.z);
+  const float tnear = max4(0.0f, near_x, near_y, near_z);
+  const float tfar = min4(t, far_x, far_y, far_z);
+  *dist = tnear;
+  if (difl != 0.0f) {
+    /* TODO(sergey): Same as for QBVH, needs a proper use. */
+    const float round_down = 1.0f - difl;
+    const float round_up = 1.0f + difl;
+    return round_down * tnear <= round_up * tfar;
+  }
+  else {
+    return tnear <= tfar;
+  }
 }
 
 ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
@@ -210,25 +206,25 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
                                                         const uint visibility,
                                                         float dist[2])
 {
-	int mask = 0;
-	float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-	if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
-#ifdef __VISIBILITY_FLAG__
-		if((__float_as_uint(cnodes.x) & visibility))
-#endif
-		{
-			mask |= 1;
-		}
-	}
-	if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
-#ifdef __VISIBILITY_FLAG__
-		if((__float_as_uint(cnodes.y) & visibility))
-#endif
-		{
-			mask |= 2;
-		}
-	}
-	return mask;
+  int mask = 0;
+  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+  if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
+#  ifdef __VISIBILITY_FLAG__
+    if ((__float_as_uint(cnodes.x) & visibility))
+#  endif
+    {
+      mask |= 1;
+    }
+  }
+  if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
+#  ifdef __VISIBILITY_FLAG__
+    if ((__float_as_uint(cnodes.y) & visibility))
+#  endif
+    {
+      mask |= 2;
+    }
+  }
+  return mask;
 }
 
 ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
@@ -242,25 +238,25 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
                                                                const uint visibility,
                                                                float dist[2])
 {
-	int mask = 0;
-	float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-	if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
-#ifdef __VISIBILITY_FLAG__
-		if((__float_as_uint(cnodes.x) & visibility))
-#endif
-		{
-			mask |= 1;
-		}
-	}
-	if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
-#ifdef __VISIBILITY_FLAG__
-		if((__float_as_uint(cnodes.y) & visibility))
-#endif
-		{
-			mask |= 2;
-		}
-	}
-	return mask;
+  int mask = 0;
+  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+  if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
+#  ifdef __VISIBILITY_FLAG__
+    if ((__float_as_uint(cnodes.x) & visibility))
+#  endif
+    {
+      mask |= 1;
+    }
+  }
+  if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
+#  ifdef __VISIBILITY_FLAG__
+    if ((__float_as_uint(cnodes.y) & visibility))
+#  endif
+    {
+      mask |= 2;
+    }
+  }
+  return mask;
 }
 
 ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
@@ -272,26 +268,13 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
                                               const uint visibility,
                                               float dist[2])
 {
-	float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
-	if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-		return bvh_unaligned_node_intersect(kg,
-		                                    P,
-		                                    dir,
-		                                    idir,
-		                                    t,
-		                                    node_addr,
-		                                    visibility,
-		                                    dist);
-	}
-	else {
-		return bvh_aligned_node_intersect(kg,
-		                                  P,
-		                                  idir,
-		                                  t,
-		                                  node_addr,
-		                                  visibility,
-		                                  dist);
-	}
+  float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+    return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
+  }
+  else {
+    return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
+  }
 }
 
 ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
@@ -305,279 +288,244 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
                                                      const uint visibility,
                                                      float dist[2])
 {
-	float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
-	if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-		return bvh_unaligned_node_intersect_robust(kg,
-		                                           P,
-		                                           dir,
-		                                           idir,
-		                                           t,
-		                                           difl,
-		                                           extmax,
-		                                           node_addr,
-		                                           visibility,
-		                                           dist);
-	}
-	else {
-		return bvh_aligned_node_intersect_robust(kg,
-		                                         P,
-		                                         idir,
-		                                         t,
-		                                         difl,
-		                                         extmax,
-		                                         node_addr,
-		                                         visibility,
-		                                         dist);
-	}
+  float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+    return bvh_unaligned_node_intersect_robust(
+        kg, P, dir, idir, t, difl, extmax, node_addr, visibility, dist);
+  }
+  else {
+    return bvh_aligned_node_intersect_robust(
+        kg, P, idir, t, difl, extmax, node_addr, visibility, dist);
+  }
 }
-#else  /* !defined(__KERNEL_SSE2__) */
-
-int ccl_device_forceinline bvh_aligned_node_intersect(
-        KernelGlobals *kg,
-        const float3& P,
-        const float3& dir,
-        const ssef& tsplat,
-        const ssef Psplat[3],
-        const ssef idirsplat[3],
-        const shuffle_swap_t shufflexyz[3],
-        const int node_addr,
-        const uint visibility,
-        float dist[2])
+#else /* !defined(__KERNEL_SSE2__) */
+
+int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg,
+                                                      const float3 &P,
+                                                      const float3 &dir,
+                                                      const ssef &tsplat,
+                                                      const ssef Psplat[3],
+                                                      const ssef idirsplat[3],
+                                                      const shuffle_swap_t shufflexyz[3],
+                                                      const int node_addr,
+                                                      const uint visibility,
+                                                      float dist[2])
 {
-	/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
-	const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+  /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
 
-	/* fetch node data */
-	const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + node_addr;
+  /* fetch node data */
+  const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr;
 
-	/* intersect ray against child nodes */
-	const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
-	const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
-	const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+  /* intersect ray against child nodes */
+  const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+  const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+  const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
 
-	/* calculate { c0min, c1min, -c0max, -c1max} */
-	ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
-	const ssef tminmax = minmax ^ pn;
-	const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+  /* calculate { c0min, c1min, -c0max, -c1max} */
+  ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+  const ssef tminmax = minmax ^ pn;
+  const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
 
-	dist[0] = tminmax[0];
-	dist[1] = tminmax[1];
+  dist[0] = tminmax[0];
+  dist[1] = tminmax[1];
 
-	int mask = movemask(lrhit);
+  int mask = movemask(lrhit);
 
 #  ifdef __VISIBILITY_FLAG__
-	/* this visibility test gives a 5% performance hit, how to solve? */
-	float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-	int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
-	            (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-	return cmask;
+  /* this visibility test gives a 5% performance hit, how to solve? */
+  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+  int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+              (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+  return cmask;
 #  else
-	return mask & 3;
+  return mask & 3;
 #  endif
 }
 
-ccl_device_forceinline int bvh_aligned_node_intersect_robust(
-        KernelGlobals *kg,
-        const float3& P,
-        const float3& dir,
-        const ssef& tsplat,
-        const ssef Psplat[3],
-        const ssef idirsplat[3],
-        const shuffle_swap_t shufflexyz[3],
-        const float difl,
-        const float extmax,
-        const int nodeAddr,
-        const uint visibility,
-        float dist[2])
+ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
+                                                             const float3 &P,
+                                                             const float3 &dir,
+                                                             const ssef &tsplat,
+                                                             const ssef Psplat[3],
+                                                             const ssef idirsplat[3],
+                                                             const shuffle_swap_t shufflexyz[3],
+                                                             const float difl,
+                                                             const float extmax,
+                                                             const int nodeAddr,
+                                                             const uint visibility,
+                                                             float dist[2])
 {
-	/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
-	const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-
-	/* fetch node data */
-	const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr;
-
-	/* intersect ray against child nodes */
-	const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
-	const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
-	const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
-	/* calculate { c0min, c1min, -c0max, -c1max} */
-	ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
-	const ssef tminmax = minmax ^ pn;
-
-	if(difl != 0.0f) {
-		float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-		float4 *tminmaxview = (float4*)&tminmax;
-		float& c0min = tminmaxview->x, &c1min = tminmaxview->y;
-		float& c0max = tminmaxview->z, &c1max = tminmaxview->w;
-		float hdiff = 1.0f + difl;
-		float ldiff = 1.0f - difl;
-		if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
-			c0min = max(ldiff * c0min, c0min - extmax);
-			c0max = min(hdiff * c0max, c0max + extmax);
-		}
-		if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
-			c1min = max(ldiff * c1min, c1min - extmax);
-			c1max = min(hdiff * c1max, c1max + extmax);
-		}
-	}
-
-	const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
-	dist[0] = tminmax[0];
-	dist[1] = tminmax[1];
-
-	int mask = movemask(lrhit);
+  /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+
+  /* fetch node data */
+  const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + nodeAddr;
+
+  /* intersect ray against child nodes */
+  const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+  const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+  const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+
+  /* calculate { c0min, c1min, -c0max, -c1max} */
+  ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+  const ssef tminmax = minmax ^ pn;
+
+  if (difl != 0.0f) {
+    float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0);
+    float4 *tminmaxview = (float4 *)&tminmax;
+    float &c0min = tminmaxview->x, &c1min = tminmaxview->y;
+    float &c0max = tminmaxview->z, &c1max = tminmaxview->w;
+    float hdiff = 1.0f + difl;
+    float ldiff = 1.0f - difl;
+    if (__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
+      c0min = max(ldiff * c0min, c0min - extmax);
+      c0max = min(hdiff * c0max, c0max + extmax);
+    }
+    if (__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
+      c1min = max(ldiff * c1min, c1min - extmax);
+      c1max = min(hdiff * c1max, c1max + extmax);
+    }
+  }
+
+  const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+
+  dist[0] = tminmax[0];
+  dist[1] = tminmax[1];
+
+  int mask = movemask(lrhit);
 
 #  ifdef __VISIBILITY_FLAG__
-	/* this visibility test gives a 5% performance hit, how to solve? */
-	float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-	int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
-	            (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-	return cmask;
+  /* this visibility test gives a 5% performance hit, how to solve? */
+  float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0);
+  int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+              (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+  return cmask;
 #  else
-	return mask & 3;
+  return mask & 3;
 #  endif
 }
 
 ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
                                                         const float3 P,
                                                         const float3 dir,
-                                                        const ssef& isect_near,
-                                                        const ssef& isect_far,
+                                                        const ssef &isect_near,
+                                                        const ssef &isect_far,
                                                         const int node_addr,
                                                         const uint visibility,
                                                         float dist[2])
 {
-	Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
-	Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
-	float3 aligned_dir0 = transform_direction(&space0, dir),
-	       aligned_dir1 = transform_direction(&space1, dir);
-	float3 aligned_P0 = transform_point(&space0, P),
-	       aligned_P1 = transform_point(&space1, P);
-	float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
-	       nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
-	ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
-	                    aligned_P1.x * nrdir1.x,
-	                    0.0f, 0.0f),
-	     lower_y = ssef(aligned_P0.y * nrdir0.y,
-	                    aligned_P1.y * nrdir1.y,
-	                    0.0f,
-	                    0.0f),
-	     lower_z = ssef(aligned_P0.z * nrdir0.z,
-	                    aligned_P1.z * nrdir1.z,
-	                    0.0f,
-	                    0.0f);
-
-	ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
-	     upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
-	     upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
-	ssef tnear_x = min(lower_x, upper_x);
-	ssef tnear_y = min(lower_y, upper_y);
-	ssef tnear_z = min(lower_z, upper_z);
-	ssef tfar_x = max(lower_x, upper_x);
-	ssef tfar_y = max(lower_y, upper_y);
-	ssef tfar_z = max(lower_z, upper_z);
-
-	const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-	const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-	sseb vmask = tnear <= tfar;
-	dist[0] = tnear.f[0];
-	dist[1] = tnear.f[1];
-
-	int mask = (int)movemask(vmask);
+  Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
+  Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
+
+  float3 aligned_dir0 = transform_direction(&space0, dir),
+         aligned_dir1 = transform_direction(&space1, dir);
+  float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
+  float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
+         nrdir1 = -bvh_inverse_direction(aligned_dir1);
+
+  ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
+       lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
+       lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
+
+  ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
+       upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
+       upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
+
+  ssef tnear_x = min(lower_x, upper_x);
+  ssef tnear_y = min(lower_y, upper_y);
+  ssef tnear_z = min(lower_z, upper_z);
+  ssef tfar_x = max(lower_x, upper_x);
+  ssef tfar_y = max(lower_y, upper_y);
+  ssef tfar_z = max(lower_z, upper_z);
+
+  const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+  const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+  sseb vmask = tnear <= tfar;
+  dist[0] = tnear.f[0];
+  dist[1] = tnear.f[1];
+
+  int mask = (int)movemask(vmask);
 
 #  ifdef __VISIBILITY_FLAG__
-	/* this visibility test gives a 5% performance hit, how to solve? */
-	float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-	int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
-	            (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-	return cmask;
+  /* this visibility test gives a 5% performance hit, how to solve? */
+  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+  int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+              (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+  return cmask;
 #  else
-	return mask & 3;
+  return mask & 3;
 #  endif
 }
 
 ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
                                                                const float3 P,
                                                                const float3 dir,
-                                                               const ssef& isect_near,
-                                                               const ssef& isect_far,
+                                                               const ssef &isect_near,
+                                                               const ssef &isect_far,
                                                                const float difl,
                                                                const int node_addr,
                                                                const uint visibility,
                                                                float dist[2])
 {
-	Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
-	Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
-	float3 aligned_dir0 = transform_direction(&space0, dir),
-	       aligned_dir1 = transform_direction(&space1, dir);
-	float3 aligned_P0 = transform_point(&space0, P),
-	       aligned_P1 = transform_point(&space1, P);
-	float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
-	       nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
-	ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
-	                    aligned_P1.x * nrdir1.x,
-	                    0.0f, 0.0f),
-	     lower_y = ssef(aligned_P0.y * nrdir0.y,
-	                    aligned_P1.y * nrdir1.y,
-	                    0.0f,
-	                    0.0f),
-	     lower_z = ssef(aligned_P0.z * nrdir0.z,
-	                    aligned_P1.z * nrdir1.z,
-	                    0.0f,
-	                    0.0f);
-
-	ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
-	     upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
-	     upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
-	ssef tnear_x = min(lower_x, upper_x);
-	ssef tnear_y = min(lower_y, upper_y);
-	ssef tnear_z = min(lower_z, upper_z);
-	ssef tfar_x = max(lower_x, upper_x);
-	ssef tfar_y = max(lower_y, upper_y);
-	ssef tfar_z = max(lower_z, upper_z);
-
-	const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-	const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-	sseb vmask;
-	if(difl != 0.0f) {
-		const float round_down = 1.0f - difl;
-		const float round_up = 1.0f + difl;
-		vmask = round_down*tnear <= round_up*tfar;
-	}
-	else {
-		vmask = tnear <= tfar;
-	}
-
-	dist[0] = tnear.f[0];
-	dist[1] = tnear.f[1];
-
-	int mask = (int)movemask(vmask);
+  Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
+  Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
+
+  float3 aligned_dir0 = transform_direction(&space0, dir),
+         aligned_dir1 = transform_direction(&space1, dir);
+  float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
+  float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
+         nrdir1 = -bvh_inverse_direction(aligned_dir1);
+
+  ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
+       lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
+       lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
+
+  ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
+       upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
+       upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
+
+  ssef tnear_x = min(lower_x, upper_x);
+  ssef tnear_y = min(lower_y, upper_y);
+  ssef tnear_z = min(lower_z, upper_z);
+  ssef tfar_x = max(lower_x, upper_x);
+  ssef tfar_y = max(lower_y, upper_y);
+  ssef tfar_z = max(lower_z, upper_z);
+
+  const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+  const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+  sseb vmask;
+  if (difl != 0.0f) {
+    const float round_down = 1.0f - difl;
+    const float round_up = 1.0f + difl;
+    vmask = round_down * tnear <= round_up * tfar;
+  }
+  else {
+    vmask = tnear <= tfar;
+  }
+
+  dist[0] = tnear.f[0];
+  dist[1] = tnear.f[1];
+
+  int mask = (int)movemask(vmask);
 
 #  ifdef __VISIBILITY_FLAG__
-	/* this visibility test gives a 5% performance hit, how to solve? */
-	float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-	int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
-	            (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
-	return cmask;
+  /* this visibility test gives a 5% performance hit, how to solve? */
+  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+  int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+              (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+  return cmask;
 #  else
-	return mask & 3;
+  return mask & 3;
 #  endif
 }
 
 ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
-                                              const float3& P,
-                                              const float3& dir,
-                                              const ssef& isect_near,
-                                              const ssef& isect_far,
-                                              const ssef& tsplat,
+                                              const float3 &P,
+                                              const float3 &dir,
+                                              const ssef &isect_near,
+                                              const ssef &isect_far,
+                                              const ssef &tsplat,
                                               const ssef Psplat[3],
                                               const ssef idirsplat[3],
                                               const shuffle_swap_t shufflexyz[3],
@@ -585,37 +533,23 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
                                               const uint visibility,
                                               float dist[2])
 {
-	float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
-	if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-		return bvh_unaligned_node_intersect(kg,
-		                                    P,
-		                                    dir,
-		                                    isect_near,
-		                                    isect_far,
-		                                    node_addr,
-		                                    visibility,
-		                                    dist);
-	}
-	else {
-		return bvh_aligned_node_intersect(kg,
-		                                  P,
-		                                  dir,
-		                                  tsplat,
-		                                  Psplat,
-		                                  idirsplat,
-		                                  shufflexyz,
-		                                  node_addr,
-		                                  visibility,
-		                                  dist);
-	}
+  float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+    return bvh_unaligned_node_intersect(
+        kg, P, dir, isect_near, isect_far, node_addr, visibility, dist);
+  }
+  else {
+    return bvh_aligned_node_intersect(
+        kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist);
+  }
 }
 
 ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
-                                                     const float3& P,
-                                                     const float3& dir,
-                                                     const ssef& isect_near,
-                                                     const ssef& isect_far,
-                                                     const ssef& tsplat,
+                                                     const float3 &P,
+                                                     const float3 &dir,
+                                                     const ssef &isect_near,
+                                                     const ssef &isect_far,
+                                                     const ssef &tsplat,
                                                      const ssef Psplat[3],
                                                      const ssef idirsplat[3],
                                                      const shuffle_swap_t shufflexyz[3],
@@ -625,31 +559,24 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
                                                      const uint visibility,
                                                      float dist[2])
 {
-	float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
-	if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-		return bvh_unaligned_node_intersect_robust(kg,
-		                                           P,
-		                                           dir,
-		                                           isect_near,
-		                                           isect_far,
-		                                           difl,
-		                                           node_addr,
-		                                           visibility,
-		                                           dist);
-	}
-	else {
-		return bvh_aligned_node_intersect_robust(kg,
-		                                         P,
-		                                         dir,
-		                                         tsplat,
-		                                         Psplat,
-		                                         idirsplat,
-		                                         shufflexyz,
-		                                         difl,
-		                                         extmax,
-		                                         node_addr,
-		                                         visibility,
-		                                         dist);
-	}
+  float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+    return bvh_unaligned_node_intersect_robust(
+        kg, P, dir, isect_near, isect_far, difl, node_addr, visibility, dist);
+  }
+  else {
+    return bvh_aligned_node_intersect_robust(kg,
+                                             P,
+                                             dir,
+                                             tsplat,
+                                             Psplat,
+                                             idirsplat,
+                                             shufflexyz,
+                                             difl,
+                                             extmax,
+                                             node_addr,
+                                             visibility,
+                                             dist);
+  }
 }
-#endif  /* !defined(__KERNEL_SSE2__) */
+#endif /* !defined(__KERNEL_SSE2__) */
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index d8e089711ee..b362779549c 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -19,9 +19,9 @@
 
 #ifdef __QBVH__
 #  include "kernel/bvh/qbvh_shadow_all.h"
-#ifdef __KERNEL_AVX2__
-#  include "kernel/bvh/obvh_shadow_all.h"
-#endif
+#  ifdef __KERNEL_AVX2__
+#    include "kernel/bvh/obvh_shadow_all.h"
+#  endif
 #endif
 
 #if BVH_FEATURE(BVH_HAIR)
@@ -44,350 +44,340 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
-                                 const Ray *ray,
-                                 Intersection *isect_array,
-                                 const uint visibility,
-                                 const uint max_hits,
-                                 uint *num_hits)
+    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+                                     const Ray *ray,
+                                     Intersection *isect_array,
+                                     const uint visibility,
+                                     const uint max_hits,
+                                     uint *num_hits)
 {
-	/* todo:
-	 * - likely and unlikely for if() statements
-	 * - test restrict attribute for pointers
-	 */
-
-	/* traversal stack in CUDA thread-local memory */
-	int traversal_stack[BVH_STACK_SIZE];
-	traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
-	/* traversal variables in registers */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
-
-	/* ray parameters in registers */
-	const float tmax = ray->t;
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
-	float isect_t = tmax;
+  /* todo:
+   * - likely and unlikely for if() statements
+   * - test restrict attribute for pointers
+   */
+
+  /* traversal stack in CUDA thread-local memory */
+  int traversal_stack[BVH_STACK_SIZE];
+  traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+  /* traversal variables in registers */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* ray parameters in registers */
+  const float tmax = ray->t;
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  float isect_t = tmax;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
 #if BVH_FEATURE(BVH_INSTANCING)
-	int num_hits_in_instance = 0;
+  int num_hits_in_instance = 0;
 #endif
 
-	*num_hits = 0;
-	isect_array->t = tmax;
+  *num_hits = 0;
+  isect_array->t = tmax;
 
 #if defined(__KERNEL_SSE2__)
-	const shuffle_swap_t shuf_identity = shuffle_swap_identity();
-	const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+  const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+  const shuffle_swap_t shuf_swap = shuffle_swap_swap();
 
-	const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-	ssef Psplat[3], idirsplat[3];
+  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+  ssef Psplat[3], idirsplat[3];
 #  if BVH_FEATURE(BVH_HAIR)
-	ssef tnear(0.0f), tfar(isect_t);
+  ssef tnear(0.0f), tfar(isect_t);
 #  endif
-	shuffle_swap_t shufflexyz[3];
+  shuffle_swap_t shufflexyz[3];
 
-	Psplat[0] = ssef(P.x);
-	Psplat[1] = ssef(P.y);
-	Psplat[2] = ssef(P.z);
+  Psplat[0] = ssef(P.x);
+  Psplat[1] = ssef(P.y);
+  Psplat[2] = ssef(P.z);
 
-	ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
+  ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
 
-	gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif  /* __KERNEL_SSE2__ */
+  gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif /* __KERNEL_SSE2__ */
 
-	/* traversal loop */
-	do {
-		do {
-			/* traverse internal nodes */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				int node_addr_child1, traverse_mask;
-				float dist[2];
-				float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+  /* traversal loop */
+  do {
+    do {
+      /* traverse internal nodes */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        int node_addr_child1, traverse_mask;
+        float dist[2];
+        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
 #if !defined(__KERNEL_SSE2__)
-				traverse_mask = NODE_INTERSECT(kg,
-				                               P,
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
 #  if BVH_FEATURE(BVH_HAIR)
-				                               dir,
+                                       dir,
 #  endif
-				                               idir,
-				                               isect_t,
-				                               node_addr,
-				                               visibility,
-				                               dist);
+                                       idir,
+                                       isect_t,
+                                       node_addr,
+                                       visibility,
+                                       dist);
 #else  // __KERNEL_SSE2__
-				traverse_mask = NODE_INTERSECT(kg,
-				                               P,
-				                               dir,
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
+                                       dir,
 #  if BVH_FEATURE(BVH_HAIR)
-				                               tnear,
-				                               tfar,
+                                       tnear,
+                                       tfar,
 #  endif
-				                               tsplat,
-				                               Psplat,
-				                               idirsplat,
-				                               shufflexyz,
-				                               node_addr,
-				                               visibility,
-				                               dist);
+                                       tsplat,
+                                       Psplat,
+                                       idirsplat,
+                                       shufflexyz,
+                                       node_addr,
+                                       visibility,
+                                       dist);
 #endif  // __KERNEL_SSE2__
 
-				node_addr = __float_as_int(cnodes.z);
-				node_addr_child1 = __float_as_int(cnodes.w);
-
-				if(traverse_mask == 3) {
-					/* Both children were intersected, push the farther one. */
-					bool is_closest_child1 = (dist[1] < dist[0]);
-					if(is_closest_child1) {
-						int tmp = node_addr;
-						node_addr = node_addr_child1;
-						node_addr_child1 = tmp;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_STACK_SIZE);
-					traversal_stack[stack_ptr] = node_addr_child1;
-				}
-				else {
-					/* One child was intersected. */
-					if(traverse_mask == 2) {
-						node_addr = node_addr_child1;
-					}
-					else if(traverse_mask == 0) {
-						/* Neither child was intersected. */
-						node_addr = traversal_stack[stack_ptr];
-						--stack_ptr;
-					}
-				}
-			}
-
-			/* if node is leaf, fetch triangle list */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-				int prim_addr = __float_as_int(leaf.x);
+        node_addr = __float_as_int(cnodes.z);
+        node_addr_child1 = __float_as_int(cnodes.w);
+
+        if (traverse_mask == 3) {
+          /* Both children were intersected, push the farther one. */
+          bool is_closest_child1 = (dist[1] < dist[0]);
+          if (is_closest_child1) {
+            int tmp = node_addr;
+            node_addr = node_addr_child1;
+            node_addr_child1 = tmp;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = node_addr_child1;
+        }
+        else {
+          /* One child was intersected. */
+          if (traverse_mask == 2) {
+            node_addr = node_addr_child1;
+          }
+          else if (traverse_mask == 0) {
+            /* Neither child was intersected. */
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+
+      /* if node is leaf, fetch triangle list */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					const int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-					const uint p_type = type & PRIMITIVE_ALL;
-
-					/* pop */
-					node_addr = traversal_stack[stack_ptr];
-					--stack_ptr;
-
-					/* primitive intersection */
-					while(prim_addr < prim_addr2) {
-						kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
-						bool hit;
-
-						/* todo: specialized intersect functions which don't fill in
-						 * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
-						 * might give a few % performance improvement */
-
-						switch(p_type) {
-							case PRIMITIVE_TRIANGLE: {
-								hit = triangle_intersect(kg,
-								                         isect_array,
-								                         P,
-								                         dir,
-								                         visibility,
-								                         object,
-								                         prim_addr);
-								break;
-							}
+          const int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+          const uint p_type = type & PRIMITIVE_ALL;
+
+          /* pop */
+          node_addr = traversal_stack[stack_ptr];
+          --stack_ptr;
+
+          /* primitive intersection */
+          while (prim_addr < prim_addr2) {
+            kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
+            bool hit;
+
+            /* todo: specialized intersect functions which don't fill in
+             * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
+             * might give a few % performance improvement */
+
+            switch (p_type) {
+              case PRIMITIVE_TRIANGLE: {
+                hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
+                break;
+              }
 #if BVH_FEATURE(BVH_MOTION)
-							case PRIMITIVE_MOTION_TRIANGLE: {
-								hit = motion_triangle_intersect(kg,
-								                                isect_array,
-								                                P,
-								                                dir,
-								                                ray->time,
-								                                visibility,
-								                                object,
-								                                prim_addr);
-								break;
-							}
+              case PRIMITIVE_MOTION_TRIANGLE: {
+                hit = motion_triangle_intersect(
+                    kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
+                break;
+              }
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-							case PRIMITIVE_CURVE:
-							case PRIMITIVE_MOTION_CURVE: {
-								const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
-									hit = cardinal_curve_intersect(kg,
-									                               isect_array,
-									                               P,
-									                               dir,
-									                               visibility,
-									                               object,
-									                               prim_addr,
-									                               ray->time,
-									                               curve_type,
-									                               NULL,
-									                               0, 0);
-								}
-								else {
-									hit = curve_intersect(kg,
-									                      isect_array,
-									                      P,
-									                      dir,
-									                      visibility,
-									                      object,
-									                      prim_addr,
-									                      ray->time,
-									                      curve_type,
-									                      NULL,
-									                      0, 0);
-								}
-								break;
-							}
+              case PRIMITIVE_CURVE:
+              case PRIMITIVE_MOTION_CURVE: {
+                const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+                if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+                  hit = cardinal_curve_intersect(kg,
+                                                 isect_array,
+                                                 P,
+                                                 dir,
+                                                 visibility,
+                                                 object,
+                                                 prim_addr,
+                                                 ray->time,
+                                                 curve_type,
+                                                 NULL,
+                                                 0,
+                                                 0);
+                }
+                else {
+                  hit = curve_intersect(kg,
+                                        isect_array,
+                                        P,
+                                        dir,
+                                        visibility,
+                                        object,
+                                        prim_addr,
+                                        ray->time,
+                                        curve_type,
+                                        NULL,
+                                        0,
+                                        0);
+                }
+                break;
+              }
 #endif
-							default: {
-								hit = false;
-								break;
-							}
-						}
+              default: {
+                hit = false;
+                break;
+              }
+            }
 
-						/* shadow ray early termination */
-						if(hit) {
-							/* detect if this surface has a shader with transparent shadows */
+            /* shadow ray early termination */
+            if (hit) {
+              /* detect if this surface has a shader with transparent shadows */
 
-							/* todo: optimize so primitive visibility flag indicates if
-							 * the primitive has a transparent shadow shader? */
-							int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
-							int shader = 0;
+              /* todo: optimize so primitive visibility flag indicates if
+               * the primitive has a transparent shadow shader? */
+              int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
+              int shader = 0;
 
 #ifdef __HAIR__
-							if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
+              if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
 #endif
-							{
-								shader = kernel_tex_fetch(__tri_shader, prim);
-							}
+              {
+                shader = kernel_tex_fetch(__tri_shader, prim);
+              }
 #ifdef __HAIR__
-							else {
-								float4 str = kernel_tex_fetch(__curves, prim);
-								shader = __float_as_int(str.z);
-							}
+              else {
+                float4 str = kernel_tex_fetch(__curves, prim);
+                shader = __float_as_int(str.z);
+              }
 #endif
-							int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
-							/* if no transparent shadows, all light is blocked */
-							if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
-								return true;
-							}
-							/* if maximum number of hits reached, block all light */
-							else if(*num_hits == max_hits) {
-								return true;
-							}
-
-							/* move on to next entry in intersections array */
-							isect_array++;
-							(*num_hits)++;
+              int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+
+              /* if no transparent shadows, all light is blocked */
+              if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+                return true;
+              }
+              /* if maximum number of hits reached, block all light */
+              else if (*num_hits == max_hits) {
+                return true;
+              }
+
+              /* move on to next entry in intersections array */
+              isect_array++;
+              (*num_hits)++;
 #if BVH_FEATURE(BVH_INSTANCING)
-							num_hits_in_instance++;
+              num_hits_in_instance++;
 #endif
 
-							isect_array->t = isect_t;
-						}
+              isect_array->t = isect_t;
+            }
 
-						prim_addr++;
-					}
-				}
+            prim_addr++;
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* instance push */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+        else {
+          /* instance push */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
 
 #  if BVH_FEATURE(BVH_MOTION)
-					isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+          isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #  else
-					isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+          isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
-					num_hits_in_instance = 0;
-					isect_array->t = isect_t;
+          num_hits_in_instance = 0;
+          isect_array->t = isect_t;
 
 #  if defined(__KERNEL_SSE2__)
-					Psplat[0] = ssef(P.x);
-					Psplat[1] = ssef(P.y);
-					Psplat[2] = ssef(P.z);
+          Psplat[0] = ssef(P.x);
+          Psplat[1] = ssef(P.y);
+          Psplat[2] = ssef(P.z);
 
-					tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+          tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
 #    if BVH_FEATURE(BVH_HAIR)
-					tfar = ssef(isect_t);
+          tfar = ssef(isect_t);
 #    endif
-					gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+          gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #  endif
 
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_STACK_SIZE);
-					traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
 
-					node_addr = kernel_tex_fetch(__object_node, object);
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+          node_addr = kernel_tex_fetch(__object_node, object);
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* Instance pop. */
-			if(num_hits_in_instance) {
-				float t_fac;
+      /* Instance pop. */
+      if (num_hits_in_instance) {
+        float t_fac;
 
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
 #  else
-				bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
 #  endif
 
-				/* scale isect->t to adjust for instancing */
-				for(int i = 0; i < num_hits_in_instance; i++) {
-					(isect_array-i-1)->t *= t_fac;
-				}
-			}
-			else {
+        /* scale isect->t to adjust for instancing */
+        for (int i = 0; i < num_hits_in_instance; i++) {
+          (isect_array - i - 1)->t *= t_fac;
+        }
+      }
+      else {
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
 #  else
-				bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
 #  endif
-			}
+      }
 
-			isect_t = tmax;
-			isect_array->t = isect_t;
+      isect_t = tmax;
+      isect_array->t = isect_t;
 
 #  if defined(__KERNEL_SSE2__)
-			Psplat[0] = ssef(P.x);
-			Psplat[1] = ssef(P.y);
-			Psplat[2] = ssef(P.z);
+      Psplat[0] = ssef(P.x);
+      Psplat[1] = ssef(P.y);
+      Psplat[2] = ssef(P.z);
 
-			tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+      tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
 #    if BVH_FEATURE(BVH_HAIR)
-			tfar = ssef(isect_t);
+      tfar = ssef(isect_t);
 #    endif
-			gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+      gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr];
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_INSTANCING) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr];
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_INSTANCING) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return false;
+  return false;
 }
 
 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
@@ -397,35 +387,20 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
                                          const uint max_hits,
                                          uint *num_hits)
 {
-	switch(kernel_data.bvh.bvh_layout) {
+  switch (kernel_data.bvh.bvh_layout) {
 #ifdef __KERNEL_AVX2__
-		case BVH_LAYOUT_BVH8:
-			return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
-			                                    ray,
-			                                    isect_array,
-			                                    visibility,
-			                                    max_hits,
-			                                    num_hits);
+    case BVH_LAYOUT_BVH8:
+      return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
 #endif
 #ifdef __QBVH__
-		case BVH_LAYOUT_BVH4:
-			return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
-			                                    ray,
-			                                    isect_array,
-			                                    visibility,
-			                                    max_hits,
-			                                    num_hits);
+    case BVH_LAYOUT_BVH4:
+      return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
 #endif
-		case BVH_LAYOUT_BVH2:
-			return BVH_FUNCTION_FULL_NAME(BVH)(kg,
-			                                   ray,
-			                                   isect_array,
-			                                   visibility,
-			                                   max_hits,
-			                                   num_hits);
-	}
-	kernel_assert(!"Should not happen");
-	return false;
+    case BVH_LAYOUT_BVH2:
+      return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
+  }
+  kernel_assert(!"Should not happen");
+  return false;
 }
 
 #undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 76d4cab663d..34a06d003bb 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -47,374 +47,362 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                                                      Intersection *isect,
                                                      const uint visibility
 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-                                                     , uint *lcg_state,
+                                                     ,
+                                                     uint *lcg_state,
                                                      float difl,
                                                      float extmax
 #endif
-                                                     )
+)
 {
-	/* todo:
-	 * - test if pushing distance on the stack helps (for non shadow rays)
-	 * - separate version for shadow rays
-	 * - likely and unlikely for if() statements
-	 * - test restrict attribute for pointers
-	 */
-
-	/* traversal stack in CUDA thread-local memory */
-	int traversal_stack[BVH_STACK_SIZE];
-	traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
-	/* traversal variables in registers */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
-
-	/* ray parameters in registers */
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
+  /* todo:
+   * - test if pushing distance on the stack helps (for non shadow rays)
+   * - separate version for shadow rays
+   * - likely and unlikely for if() statements
+   * - test restrict attribute for pointers
+   */
+
+  /* traversal stack in CUDA thread-local memory */
+  int traversal_stack[BVH_STACK_SIZE];
+  traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+  /* traversal variables in registers */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* ray parameters in registers */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
-	isect->t = ray->t;
-	isect->u = 0.0f;
-	isect->v = 0.0f;
-	isect->prim = PRIM_NONE;
-	isect->object = OBJECT_NONE;
+  isect->t = ray->t;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
 
-	BVH_DEBUG_INIT();
+  BVH_DEBUG_INIT();
 
 #if defined(__KERNEL_SSE2__)
-	const shuffle_swap_t shuf_identity = shuffle_swap_identity();
-	const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+  const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+  const shuffle_swap_t shuf_swap = shuffle_swap_swap();
 
-	const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-	ssef Psplat[3], idirsplat[3];
+  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+  ssef Psplat[3], idirsplat[3];
 #  if BVH_FEATURE(BVH_HAIR)
-	ssef tnear(0.0f), tfar(isect->t);
+  ssef tnear(0.0f), tfar(isect->t);
 #  endif
-	shuffle_swap_t shufflexyz[3];
+  shuffle_swap_t shufflexyz[3];
 
-	Psplat[0] = ssef(P.x);
-	Psplat[1] = ssef(P.y);
-	Psplat[2] = ssef(P.z);
+  Psplat[0] = ssef(P.x);
+  Psplat[1] = ssef(P.y);
+  Psplat[2] = ssef(P.z);
 
-	ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
+  ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
 
-	gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+  gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #endif
 
-	/* traversal loop */
-	do {
-		do {
-			/* traverse internal nodes */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				int node_addr_child1, traverse_mask;
-				float dist[2];
-				float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+  /* traversal loop */
+  do {
+    do {
+      /* traverse internal nodes */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        int node_addr_child1, traverse_mask;
+        float dist[2];
+        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
 #if !defined(__KERNEL_SSE2__)
 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-				if(difl != 0.0f) {
-					traverse_mask = NODE_INTERSECT_ROBUST(kg,
-					                                      P,
+        if (difl != 0.0f) {
+          traverse_mask = NODE_INTERSECT_ROBUST(kg,
+                                                P,
 #    if BVH_FEATURE(BVH_HAIR)
-					                                      dir,
+                                                dir,
 #    endif
-					                                      idir,
-					                                      isect->t,
-					                                      difl,
-					                                      extmax,
-					                                      node_addr,
-					                                      visibility,
-					                                      dist);
-				}
-				else
+                                                idir,
+                                                isect->t,
+                                                difl,
+                                                extmax,
+                                                node_addr,
+                                                visibility,
+                                                dist);
+        }
+        else
 #  endif
-				{
-					traverse_mask = NODE_INTERSECT(kg,
-					                               P,
-#    if BVH_FEATURE(BVH_HAIR)
-					                               dir,
-#    endif
-					                               idir,
-					                               isect->t,
-					                               node_addr,
-					                               visibility,
-					                               dist);
-				}
+        {
+          traverse_mask = NODE_INTERSECT(kg,
+                                         P,
+#  if BVH_FEATURE(BVH_HAIR)
+                                         dir,
+#  endif
+                                         idir,
+                                         isect->t,
+                                         node_addr,
+                                         visibility,
+                                         dist);
+        }
 #else  // __KERNEL_SSE2__
 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-				if(difl != 0.0f) {
-					traverse_mask = NODE_INTERSECT_ROBUST(kg,
-					                                      P,
-					                                      dir,
+        if (difl != 0.0f) {
+          traverse_mask = NODE_INTERSECT_ROBUST(kg,
+                                                P,
+                                                dir,
 #    if BVH_FEATURE(BVH_HAIR)
-					                                      tnear,
-					                                      tfar,
+                                                tnear,
+                                                tfar,
 #    endif
-					                                      tsplat,
-					                                      Psplat,
-					                                      idirsplat,
-					                                      shufflexyz,
-					                                      difl,
-					                                      extmax,
-					                                      node_addr,
-					                                      visibility,
-					                                      dist);
-				}
-				else
+                                                tsplat,
+                                                Psplat,
+                                                idirsplat,
+                                                shufflexyz,
+                                                difl,
+                                                extmax,
+                                                node_addr,
+                                                visibility,
+                                                dist);
+        }
+        else
 #  endif
-				{
-					traverse_mask = NODE_INTERSECT(kg,
-					                               P,
-					                               dir,
-#    if BVH_FEATURE(BVH_HAIR)
-					                               tnear,
-					                               tfar,
-#    endif
-					                               tsplat,
-					                               Psplat,
-					                               idirsplat,
-					                               shufflexyz,
-					                               node_addr,
-					                               visibility,
-					                               dist);
-				}
+        {
+          traverse_mask = NODE_INTERSECT(kg,
+                                         P,
+                                         dir,
+#  if BVH_FEATURE(BVH_HAIR)
+                                         tnear,
+                                         tfar,
+#  endif
+                                         tsplat,
+                                         Psplat,
+                                         idirsplat,
+                                         shufflexyz,
+                                         node_addr,
+                                         visibility,
+                                         dist);
+        }
 #endif  // __KERNEL_SSE2__
 
-				node_addr = __float_as_int(cnodes.z);
-				node_addr_child1 = __float_as_int(cnodes.w);
-
-				if(traverse_mask == 3) {
-					/* Both children were intersected, push the farther one. */
-					bool is_closest_child1 = (dist[1] < dist[0]);
-					if(is_closest_child1) {
-						int tmp = node_addr;
-						node_addr = node_addr_child1;
-						node_addr_child1 = tmp;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_STACK_SIZE);
-					traversal_stack[stack_ptr] = node_addr_child1;
-				}
-				else {
-					/* One child was intersected. */
-					if(traverse_mask == 2) {
-						node_addr = node_addr_child1;
-					}
-					else if(traverse_mask == 0) {
-						/* Neither child was intersected. */
-						node_addr = traversal_stack[stack_ptr];
-						--stack_ptr;
-					}
-				}
-				BVH_DEBUG_NEXT_NODE();
-			}
-
-			/* if node is leaf, fetch triangle list */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-				int prim_addr = __float_as_int(leaf.x);
+        node_addr = __float_as_int(cnodes.z);
+        node_addr_child1 = __float_as_int(cnodes.w);
+
+        if (traverse_mask == 3) {
+          /* Both children were intersected, push the farther one. */
+          bool is_closest_child1 = (dist[1] < dist[0]);
+          if (is_closest_child1) {
+            int tmp = node_addr;
+            node_addr = node_addr_child1;
+            node_addr_child1 = tmp;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = node_addr_child1;
+        }
+        else {
+          /* One child was intersected. */
+          if (traverse_mask == 2) {
+            node_addr = node_addr_child1;
+          }
+          else if (traverse_mask == 0) {
+            /* Neither child was intersected. */
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+        BVH_DEBUG_NEXT_NODE();
+      }
+
+      /* if node is leaf, fetch triangle list */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					const int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-
-					/* pop */
-					node_addr = traversal_stack[stack_ptr];
-					--stack_ptr;
-
-					/* primitive intersection */
-					switch(type & PRIMITIVE_ALL) {
-						case PRIMITIVE_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								if(triangle_intersect(kg,
-								                      isect,
-								                      P,
-								                      dir,
-								                      visibility,
-								                      object,
-								                      prim_addr))
-								{
-									/* shadow ray early termination */
+          const int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+
+          /* pop */
+          node_addr = traversal_stack[stack_ptr];
+          --stack_ptr;
+
+          /* primitive intersection */
+          switch (type & PRIMITIVE_ALL) {
+            case PRIMITIVE_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                BVH_DEBUG_NEXT_INTERSECTION();
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
+                  /* shadow ray early termination */
 #if defined(__KERNEL_SSE2__)
-									if(visibility & PATH_RAY_SHADOW_OPAQUE)
-										return true;
-									tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
+                    return true;
+                  tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
 #  if BVH_FEATURE(BVH_HAIR)
-									tfar = ssef(isect->t);
+                  tfar = ssef(isect->t);
 #  endif
 #else
-									if(visibility & PATH_RAY_SHADOW_OPAQUE)
-										return true;
+                if (visibility & PATH_RAY_SHADOW_OPAQUE)
+                  return true;
 #endif
-								}
-							}
-							break;
-						}
+                }
+              }
+              break;
+            }
 #if BVH_FEATURE(BVH_MOTION)
-						case PRIMITIVE_MOTION_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								if(motion_triangle_intersect(kg,
-								                             isect,
-								                             P,
-								                             dir,
-								                             ray->time,
-								                             visibility,
-								                             object,
-								                             prim_addr))
-								{
-									/* shadow ray early termination */
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                BVH_DEBUG_NEXT_INTERSECTION();
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                if (motion_triangle_intersect(
+                        kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
+                  /* shadow ray early termination */
 #  if defined(__KERNEL_SSE2__)
-									if(visibility & PATH_RAY_SHADOW_OPAQUE)
-										return true;
-									tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
+                    return true;
+                  tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
 #    if BVH_FEATURE(BVH_HAIR)
-									tfar = ssef(isect->t);
+                  tfar = ssef(isect->t);
 #    endif
 #  else
-									if(visibility & PATH_RAY_SHADOW_OPAQUE)
-										return true;
+                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
+                    return true;
 #  endif
-								}
-							}
-							break;
-						}
-#endif  /* BVH_FEATURE(BVH_MOTION) */
+                }
+              }
+              break;
+            }
+#endif /* BVH_FEATURE(BVH_MOTION) */
 #if BVH_FEATURE(BVH_HAIR)
-						case PRIMITIVE_CURVE:
-						case PRIMITIVE_MOTION_CURVE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
-								const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-								kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
-								bool hit;
-								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
-									hit = cardinal_curve_intersect(kg,
-									                               isect,
-									                               P,
-									                               dir,
-									                               visibility,
-									                               object,
-									                               prim_addr,
-									                               ray->time,
-									                               curve_type,
-									                               lcg_state,
-									                               difl,
-									                               extmax);
-								}
-								else {
-									hit = curve_intersect(kg,
-									                      isect,
-									                      P,
-									                      dir,
-									                      visibility,
-									                      object,
-									                      prim_addr,
-									                      ray->time,
-									                      curve_type,
-									                      lcg_state,
-									                      difl,
-									                      extmax);
-								}
-								if(hit) {
-									/* shadow ray early termination */
+            case PRIMITIVE_CURVE:
+            case PRIMITIVE_MOTION_CURVE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                BVH_DEBUG_NEXT_INTERSECTION();
+                const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+                kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
+                bool hit;
+                if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+                  hit = cardinal_curve_intersect(kg,
+                                                 isect,
+                                                 P,
+                                                 dir,
+                                                 visibility,
+                                                 object,
+                                                 prim_addr,
+                                                 ray->time,
+                                                 curve_type,
+                                                 lcg_state,
+                                                 difl,
+                                                 extmax);
+                }
+                else {
+                  hit = curve_intersect(kg,
+                                        isect,
+                                        P,
+                                        dir,
+                                        visibility,
+                                        object,
+                                        prim_addr,
+                                        ray->time,
+                                        curve_type,
+                                        lcg_state,
+                                        difl,
+                                        extmax);
+                }
+                if (hit) {
+                  /* shadow ray early termination */
 #  if defined(__KERNEL_SSE2__)
-									if(visibility & PATH_RAY_SHADOW_OPAQUE)
-										return true;
-									tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
+                    return true;
+                  tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
 #    if BVH_FEATURE(BVH_HAIR)
-									tfar = ssef(isect->t);
+                  tfar = ssef(isect->t);
 #    endif
 #  else
-									if(visibility & PATH_RAY_SHADOW_OPAQUE)
-										return true;
+                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
+                    return true;
 #  endif
-								}
-							}
-							break;
-						}
-#endif  /* BVH_FEATURE(BVH_HAIR) */
-					}
-				}
+                }
+              }
+              break;
+            }
+#endif /* BVH_FEATURE(BVH_HAIR) */
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* instance push */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+        else {
+          /* instance push */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
 
 #  if BVH_FEATURE(BVH_MOTION)
-					isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+          isect->t = bvh_instance_motion_push(
+              kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-					isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
+          isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
 #  if defined(__KERNEL_SSE2__)
-					Psplat[0] = ssef(P.x);
-					Psplat[1] = ssef(P.y);
-					Psplat[2] = ssef(P.z);
+          Psplat[0] = ssef(P.x);
+          Psplat[1] = ssef(P.y);
+          Psplat[2] = ssef(P.z);
 
-					tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+          tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
 #    if BVH_FEATURE(BVH_HAIR)
-					tfar = ssef(isect->t);
+          tfar = ssef(isect->t);
 #    endif
 
-					gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+          gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #  endif
 
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_STACK_SIZE);
-					traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
 
-					node_addr = kernel_tex_fetch(__object_node, object);
+          node_addr = kernel_tex_fetch(__object_node, object);
 
-					BVH_DEBUG_NEXT_INSTANCE();
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+          BVH_DEBUG_NEXT_INSTANCE();
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* instance pop */
+      /* instance pop */
 #  if BVH_FEATURE(BVH_MOTION)
-			isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-			isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
 #  if defined(__KERNEL_SSE2__)
-			Psplat[0] = ssef(P.x);
-			Psplat[1] = ssef(P.y);
-			Psplat[2] = ssef(P.z);
+      Psplat[0] = ssef(P.x);
+      Psplat[1] = ssef(P.y);
+      Psplat[2] = ssef(P.z);
 
-			tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+      tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
 #    if BVH_FEATURE(BVH_HAIR)
-			tfar = ssef(isect->t);
+      tfar = ssef(isect->t);
 #    endif
 
-			gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+      gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr];
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_INSTANCING) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr];
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_INSTANCING) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return (isect->prim != PRIM_NONE);
+  return (isect->prim != PRIM_NONE);
 }
 
 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
@@ -422,53 +410,57 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
                                          Intersection *isect,
                                          const uint visibility
 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-                                         , uint *lcg_state,
+                                         ,
+                                         uint *lcg_state,
                                          float difl,
                                          float extmax
 #endif
-                                         )
+)
 {
-	switch(kernel_data.bvh.bvh_layout) {
+  switch (kernel_data.bvh.bvh_layout) {
 #ifdef __KERNEL_AVX2__
-		case BVH_LAYOUT_BVH8:
-			return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
-			                                    ray,
-			                                    isect,
-			                                    visibility
+    case BVH_LAYOUT_BVH8:
+      return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
+                                          ray,
+                                          isect,
+                                          visibility
 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-			                                    , lcg_state,
-			                                    difl,
-			                                    extmax
+                                          ,
+                                          lcg_state,
+                                          difl,
+                                          extmax
 #  endif
-			                                    );
+      );
 #endif
 #ifdef __QBVH__
-		case BVH_LAYOUT_BVH4:
-			return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
-			                                    ray,
-			                                    isect,
-			                                    visibility
+    case BVH_LAYOUT_BVH4:
+      return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
+                                          ray,
+                                          isect,
+                                          visibility
 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-			                                    , lcg_state,
-			                                    difl,
-			                                    extmax
+                                          ,
+                                          lcg_state,
+                                          difl,
+                                          extmax
 #  endif
-			                                    );
-#endif  /* __QBVH__ */
-		case BVH_LAYOUT_BVH2:
-			return BVH_FUNCTION_FULL_NAME(BVH)(kg,
-			                                   ray,
-			                                   isect,
-			                                   visibility
+      );
+#endif /* __QBVH__ */
+    case BVH_LAYOUT_BVH2:
+      return BVH_FUNCTION_FULL_NAME(BVH)(kg,
+                                         ray,
+                                         isect,
+                                         visibility
 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-			                                   , lcg_state,
-			                                   difl,
-			                                   extmax
+                                         ,
+                                         lcg_state,
+                                         difl,
+                                         extmax
 #endif
-			                                   );
-	}
-	kernel_assert(!"Should not happen");
-	return false;
+      );
+  }
+  kernel_assert(!"Should not happen");
+  return false;
 }
 
 #undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_types.h b/intern/cycles/kernel/bvh/bvh_types.h
index 4ca0dc2225e..16f3b03f842 100644
--- a/intern/cycles/kernel/bvh/bvh_types.h
+++ b/intern/cycles/kernel/bvh/bvh_types.h
@@ -35,13 +35,13 @@ CCL_NAMESPACE_BEGIN
 #define BVH_OSTACK_SIZE 768
 /* BVH intersection function variations */
 
-#define BVH_INSTANCING			1
-#define BVH_MOTION				2
-#define BVH_HAIR				4
-#define BVH_HAIR_MINIMUM_WIDTH	8
+#define BVH_INSTANCING 1
+#define BVH_MOTION 2
+#define BVH_HAIR 4
+#define BVH_HAIR_MINIMUM_WIDTH 8
 
-#define BVH_NAME_JOIN(x,y) x ## _ ## y
-#define BVH_NAME_EVAL(x,y) BVH_NAME_JOIN(x,y)
+#define BVH_NAME_JOIN(x, y) x##_##y
+#define BVH_NAME_EVAL(x, y) BVH_NAME_JOIN(x, y)
 #define BVH_FUNCTION_FULL_NAME(prefix) BVH_NAME_EVAL(prefix, BVH_FUNCTION_NAME)
 
 #define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
@@ -49,30 +49,30 @@ CCL_NAMESPACE_BEGIN
 /* Debugging heleprs */
 #ifdef __KERNEL_DEBUG__
 #  define BVH_DEBUG_INIT() \
-	do { \
-		isect->num_traversed_nodes = 0; \
-		isect->num_traversed_instances = 0; \
-		isect->num_intersections = 0; \
-	} while(0)
+    do { \
+      isect->num_traversed_nodes = 0; \
+      isect->num_traversed_instances = 0; \
+      isect->num_intersections = 0; \
+    } while (0)
 #  define BVH_DEBUG_NEXT_NODE() \
-	do { \
-		++isect->num_traversed_nodes; \
-	} while(0)
+    do { \
+      ++isect->num_traversed_nodes; \
+    } while (0)
 #  define BVH_DEBUG_NEXT_INTERSECTION() \
-	do { \
-		++isect->num_intersections; \
-	} while(0)
+    do { \
+      ++isect->num_intersections; \
+    } while (0)
 #  define BVH_DEBUG_NEXT_INSTANCE() \
-	do { \
-		++isect->num_traversed_instances; \
-	} while(0)
-#else  /* __KERNEL_DEBUG__ */
+    do { \
+      ++isect->num_traversed_instances; \
+    } while (0)
+#else /* __KERNEL_DEBUG__ */
 #  define BVH_DEBUG_INIT()
 #  define BVH_DEBUG_NEXT_NODE()
 #  define BVH_DEBUG_NEXT_INTERSECTION()
 #  define BVH_DEBUG_NEXT_INSTANCE()
-#endif  /* __KERNEL_DEBUG__ */
+#endif /* __KERNEL_DEBUG__ */
 
 CCL_NAMESPACE_END
 
-#endif  /* __BVH_TYPES__ */
+#endif /* __BVH_TYPES__ */
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index b8257e3493e..c83b0d783f4 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -19,9 +19,9 @@
 
 #ifdef __QBVH__
 #  include "kernel/bvh/qbvh_volume.h"
-#ifdef __KERNEL_AVX2__
-#  include "kernel/bvh/obvh_volume.h"
-#endif
+#  ifdef __KERNEL_AVX2__
+#    include "kernel/bvh/obvh_volume.h"
+#  endif
 #endif
 
 #if BVH_FEATURE(BVH_HAIR)
@@ -43,267 +43,260 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
-                                 const Ray *ray,
-                                 Intersection *isect,
-                                 const uint visibility)
+    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+                                     const Ray *ray,
+                                     Intersection *isect,
+                                     const uint visibility)
 {
-	/* todo:
-	 * - test if pushing distance on the stack helps (for non shadow rays)
-	 * - separate version for shadow rays
-	 * - likely and unlikely for if() statements
-	 * - test restrict attribute for pointers
-	 */
-
-	/* traversal stack in CUDA thread-local memory */
-	int traversal_stack[BVH_STACK_SIZE];
-	traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
-	/* traversal variables in registers */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
-
-	/* ray parameters in registers */
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
+  /* todo:
+   * - test if pushing distance on the stack helps (for non shadow rays)
+   * - separate version for shadow rays
+   * - likely and unlikely for if() statements
+   * - test restrict attribute for pointers
+   */
+
+  /* traversal stack in CUDA thread-local memory */
+  int traversal_stack[BVH_STACK_SIZE];
+  traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+  /* traversal variables in registers */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* ray parameters in registers */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
-	isect->t = ray->t;
-	isect->u = 0.0f;
-	isect->v = 0.0f;
-	isect->prim = PRIM_NONE;
-	isect->object = OBJECT_NONE;
+  isect->t = ray->t;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
 
 #if defined(__KERNEL_SSE2__)
-	const shuffle_swap_t shuf_identity = shuffle_swap_identity();
-	const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+  const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+  const shuffle_swap_t shuf_swap = shuffle_swap_swap();
 
-	const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-	ssef Psplat[3], idirsplat[3];
+  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+  ssef Psplat[3], idirsplat[3];
 #  if BVH_FEATURE(BVH_HAIR)
-	ssef tnear(0.0f), tfar(isect->t);
+  ssef tnear(0.0f), tfar(isect->t);
 #  endif
-	shuffle_swap_t shufflexyz[3];
+  shuffle_swap_t shufflexyz[3];
 
-	Psplat[0] = ssef(P.x);
-	Psplat[1] = ssef(P.y);
-	Psplat[2] = ssef(P.z);
+  Psplat[0] = ssef(P.x);
+  Psplat[1] = ssef(P.y);
+  Psplat[2] = ssef(P.z);
 
-	ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
+  ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
 
-	gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+  gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #endif
 
-	/* traversal loop */
-	do {
-		do {
-			/* traverse internal nodes */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				int node_addr_child1, traverse_mask;
-				float dist[2];
-				float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+  /* traversal loop */
+  do {
+    do {
+      /* traverse internal nodes */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        int node_addr_child1, traverse_mask;
+        float dist[2];
+        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
 #if !defined(__KERNEL_SSE2__)
-				traverse_mask = NODE_INTERSECT(kg,
-				                               P,
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
 #  if BVH_FEATURE(BVH_HAIR)
-				                               dir,
+                                       dir,
 #  endif
-				                               idir,
-				                               isect->t,
-				                               node_addr,
-				                               visibility,
-				                               dist);
+                                       idir,
+                                       isect->t,
+                                       node_addr,
+                                       visibility,
+                                       dist);
 #else  // __KERNEL_SSE2__
-				traverse_mask = NODE_INTERSECT(kg,
-				                               P,
-				                               dir,
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
+                                       dir,
 #  if BVH_FEATURE(BVH_HAIR)
-				                               tnear,
-				                               tfar,
+                                       tnear,
+                                       tfar,
 #  endif
-				                               tsplat,
-				                               Psplat,
-				                               idirsplat,
-				                               shufflexyz,
-				                               node_addr,
-				                               visibility,
-				                               dist);
+                                       tsplat,
+                                       Psplat,
+                                       idirsplat,
+                                       shufflexyz,
+                                       node_addr,
+                                       visibility,
+                                       dist);
 #endif  // __KERNEL_SSE2__
 
-				node_addr = __float_as_int(cnodes.z);
-				node_addr_child1 = __float_as_int(cnodes.w);
-
-				if(traverse_mask == 3) {
-					/* Both children were intersected, push the farther one. */
-					bool is_closest_child1 = (dist[1] < dist[0]);
-					if(is_closest_child1) {
-						int tmp = node_addr;
-						node_addr = node_addr_child1;
-						node_addr_child1 = tmp;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_STACK_SIZE);
-					traversal_stack[stack_ptr] = node_addr_child1;
-				}
-				else {
-					/* One child was intersected. */
-					if(traverse_mask == 2) {
-						node_addr = node_addr_child1;
-					}
-					else if(traverse_mask == 0) {
-						/* Neither child was intersected. */
-						node_addr = traversal_stack[stack_ptr];
-						--stack_ptr;
-					}
-				}
-			}
-
-			/* if node is leaf, fetch triangle list */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-				int prim_addr = __float_as_int(leaf.x);
+        node_addr = __float_as_int(cnodes.z);
+        node_addr_child1 = __float_as_int(cnodes.w);
+
+        if (traverse_mask == 3) {
+          /* Both children were intersected, push the farther one. */
+          bool is_closest_child1 = (dist[1] < dist[0]);
+          if (is_closest_child1) {
+            int tmp = node_addr;
+            node_addr = node_addr_child1;
+            node_addr_child1 = tmp;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = node_addr_child1;
+        }
+        else {
+          /* One child was intersected. */
+          if (traverse_mask == 2) {
+            node_addr = node_addr_child1;
+          }
+          else if (traverse_mask == 0) {
+            /* Neither child was intersected. */
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+
+      /* if node is leaf, fetch triangle list */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					const int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-
-					/* pop */
-					node_addr = traversal_stack[stack_ptr];
-					--stack_ptr;
-
-					/* primitive intersection */
-					switch(type & PRIMITIVE_ALL) {
-						case PRIMITIVE_TRIANGLE: {
-							/* intersect ray against primitive */
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* only primitives from volume object */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								triangle_intersect(kg,
-								                   isect,
-								                   P,
-								                   dir,
-								                   visibility,
-								                   object,
-								                   prim_addr);
-							}
-							break;
-						}
+          const int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+
+          /* pop */
+          node_addr = traversal_stack[stack_ptr];
+          --stack_ptr;
+
+          /* primitive intersection */
+          switch (type & PRIMITIVE_ALL) {
+            case PRIMITIVE_TRIANGLE: {
+              /* intersect ray against primitive */
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* only primitives from volume object */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
+              }
+              break;
+            }
 #if BVH_FEATURE(BVH_MOTION)
-						case PRIMITIVE_MOTION_TRIANGLE: {
-							/* intersect ray against primitive */
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* only primitives from volume object */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								motion_triangle_intersect(kg,
-								                          isect,
-								                          P,
-								                          dir,
-								                          ray->time,
-								                          visibility,
-								                          object,
-								                          prim_addr);
-							}
-							break;
-						}
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              /* intersect ray against primitive */
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* only primitives from volume object */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                motion_triangle_intersect(
+                    kg, isect, P, dir, ray->time, visibility, object, prim_addr);
+              }
+              break;
+            }
 #endif
-						default: {
-							break;
-						}
-					}
-				}
+            default: {
+              break;
+            }
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* instance push */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
-					int object_flag = kernel_tex_fetch(__object_flag, object);
-					if(object_flag & SD_OBJECT_HAS_VOLUME) {
+        else {
+          /* instance push */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+          int object_flag = kernel_tex_fetch(__object_flag, object);
+          if (object_flag & SD_OBJECT_HAS_VOLUME) {
 #  if BVH_FEATURE(BVH_MOTION)
-						isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+            isect->t = bvh_instance_motion_push(
+                kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-						isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
+            isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
 #  if defined(__KERNEL_SSE2__)
-						Psplat[0] = ssef(P.x);
-						Psplat[1] = ssef(P.y);
-						Psplat[2] = ssef(P.z);
+            Psplat[0] = ssef(P.x);
+            Psplat[1] = ssef(P.y);
+            Psplat[2] = ssef(P.z);
 
-						tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+            tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
 #    if BVH_FEATURE(BVH_HAIR)
-						tfar = ssef(isect->t);
+            tfar = ssef(isect->t);
 #    endif
 
-						gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+            gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #  endif
 
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_STACK_SIZE);
-						traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
-
-						node_addr = kernel_tex_fetch(__object_node, object);
-					}
-					else {
-						/* pop */
-						object = OBJECT_NONE;
-						node_addr = traversal_stack[stack_ptr];
-						--stack_ptr;
-					}
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_STACK_SIZE);
+            traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+
+            node_addr = kernel_tex_fetch(__object_node, object);
+          }
+          else {
+            /* pop */
+            object = OBJECT_NONE;
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* instance pop */
+      /* instance pop */
 #  if BVH_FEATURE(BVH_MOTION)
-			isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-			isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
 #  if defined(__KERNEL_SSE2__)
-			Psplat[0] = ssef(P.x);
-			Psplat[1] = ssef(P.y);
-			Psplat[2] = ssef(P.z);
+      Psplat[0] = ssef(P.x);
+      Psplat[1] = ssef(P.y);
+      Psplat[2] = ssef(P.z);
 
-			tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+      tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
 #    if BVH_FEATURE(BVH_HAIR)
-			tfar = ssef(isect->t);
+      tfar = ssef(isect->t);
 #    endif
 
-			gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+      gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr];
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_MOTION) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr];
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_MOTION) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return (isect->prim != PRIM_NONE);
+  return (isect->prim != PRIM_NONE);
 }
 
 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
@@ -311,29 +304,20 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
                                          Intersection *isect,
                                          const uint visibility)
 {
-	switch(kernel_data.bvh.bvh_layout) {
+  switch (kernel_data.bvh.bvh_layout) {
 #ifdef __KERNEL_AVX2__
-		case BVH_LAYOUT_BVH8:
-			return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
-			                                    ray,
-			                                    isect,
-			                                    visibility);
+    case BVH_LAYOUT_BVH8:
+      return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility);
 #endif
 #ifdef __QBVH__
-		case BVH_LAYOUT_BVH4:
-			return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
-			                                    ray,
-			                                    isect,
-			                                    visibility);
+    case BVH_LAYOUT_BVH4:
+      return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility);
 #endif
-		case BVH_LAYOUT_BVH2:
-			return BVH_FUNCTION_FULL_NAME(BVH)(kg,
-			                                   ray,
-			                                   isect,
-			                                   visibility);
-	}
-	kernel_assert(!"Should not happen");
-	return false;
+    case BVH_LAYOUT_BVH2:
+      return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
+  }
+  kernel_assert(!"Should not happen");
+  return false;
 }
 
 #undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index f3ca4058460..ae8c4d12e8a 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -19,9 +19,9 @@
 
 #ifdef __QBVH__
 #  include "kernel/bvh/qbvh_volume_all.h"
-#ifdef __KERNEL_AVX2__
-#  include "kernel/bvh/obvh_volume_all.h"
-#endif
+#  ifdef __KERNEL_AVX2__
+#    include "kernel/bvh/obvh_volume_all.h"
+#  endif
 #endif
 
 #if BVH_FEATURE(BVH_HAIR)
@@ -43,342 +43,337 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
-                                 const Ray *ray,
-                                 Intersection *isect_array,
-                                 const uint max_hits,
-                                 const uint visibility)
+    uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+                                     const Ray *ray,
+                                     Intersection *isect_array,
+                                     const uint max_hits,
+                                     const uint visibility)
 {
-	/* todo:
-	 * - test if pushing distance on the stack helps (for non shadow rays)
-	 * - separate version for shadow rays
-	 * - likely and unlikely for if() statements
-	 * - test restrict attribute for pointers
-	 */
-
-	/* traversal stack in CUDA thread-local memory */
-	int traversal_stack[BVH_STACK_SIZE];
-	traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
-	/* traversal variables in registers */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
-
-	/* ray parameters in registers */
-	const float tmax = ray->t;
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
-	float isect_t = tmax;
+  /* todo:
+   * - test if pushing distance on the stack helps (for non shadow rays)
+   * - separate version for shadow rays
+   * - likely and unlikely for if() statements
+   * - test restrict attribute for pointers
+   */
+
+  /* traversal stack in CUDA thread-local memory */
+  int traversal_stack[BVH_STACK_SIZE];
+  traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+  /* traversal variables in registers */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* ray parameters in registers */
+  const float tmax = ray->t;
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  float isect_t = tmax;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
 #if BVH_FEATURE(BVH_INSTANCING)
-	int num_hits_in_instance = 0;
+  int num_hits_in_instance = 0;
 #endif
 
-	uint num_hits = 0;
-	isect_array->t = tmax;
+  uint num_hits = 0;
+  isect_array->t = tmax;
 
 #if defined(__KERNEL_SSE2__)
-	const shuffle_swap_t shuf_identity = shuffle_swap_identity();
-	const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+  const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+  const shuffle_swap_t shuf_swap = shuffle_swap_swap();
 
-	const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-	ssef Psplat[3], idirsplat[3];
+  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+  ssef Psplat[3], idirsplat[3];
 #  if BVH_FEATURE(BVH_HAIR)
-	ssef tnear(0.0f), tfar(isect_t);
+  ssef tnear(0.0f), tfar(isect_t);
 #  endif
-	shuffle_swap_t shufflexyz[3];
+  shuffle_swap_t shufflexyz[3];
 
-	Psplat[0] = ssef(P.x);
-	Psplat[1] = ssef(P.y);
-	Psplat[2] = ssef(P.z);
+  Psplat[0] = ssef(P.x);
+  Psplat[1] = ssef(P.y);
+  Psplat[2] = ssef(P.z);
 
-	ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
+  ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
 
-	gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif  /* __KERNEL_SSE2__ */
+  gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif /* __KERNEL_SSE2__ */
 
-	/* traversal loop */
-	do {
-		do {
-			/* traverse internal nodes */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				int node_addr_child1, traverse_mask;
-				float dist[2];
-				float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+  /* traversal loop */
+  do {
+    do {
+      /* traverse internal nodes */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        int node_addr_child1, traverse_mask;
+        float dist[2];
+        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
 #if !defined(__KERNEL_SSE2__)
-				traverse_mask = NODE_INTERSECT(kg,
-				                               P,
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
 #  if BVH_FEATURE(BVH_HAIR)
-				                               dir,
+                                       dir,
 #  endif
-				                               idir,
-				                               isect_t,
-				                               node_addr,
-				                               visibility,
-				                               dist);
+                                       idir,
+                                       isect_t,
+                                       node_addr,
+                                       visibility,
+                                       dist);
 #else  // __KERNEL_SSE2__
-				traverse_mask = NODE_INTERSECT(kg,
-				                               P,
-				                               dir,
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
+                                       dir,
 #  if BVH_FEATURE(BVH_HAIR)
-				                               tnear,
-				                               tfar,
+                                       tnear,
+                                       tfar,
 #  endif
-				                               tsplat,
-				                               Psplat,
-				                               idirsplat,
-				                               shufflexyz,
-				                               node_addr,
-				                               visibility,
-				                               dist);
+                                       tsplat,
+                                       Psplat,
+                                       idirsplat,
+                                       shufflexyz,
+                                       node_addr,
+                                       visibility,
+                                       dist);
 #endif  // __KERNEL_SSE2__
 
-				node_addr = __float_as_int(cnodes.z);
-				node_addr_child1 = __float_as_int(cnodes.w);
-
-				if(traverse_mask == 3) {
-					/* Both children were intersected, push the farther one. */
-					bool is_closest_child1 = (dist[1] < dist[0]);
-					if(is_closest_child1) {
-						int tmp = node_addr;
-						node_addr = node_addr_child1;
-						node_addr_child1 = tmp;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_STACK_SIZE);
-					traversal_stack[stack_ptr] = node_addr_child1;
-				}
-				else {
-					/* One child was intersected. */
-					if(traverse_mask == 2) {
-						node_addr = node_addr_child1;
-					}
-					else if(traverse_mask == 0) {
-						/* Neither child was intersected. */
-						node_addr = traversal_stack[stack_ptr];
-						--stack_ptr;
-					}
-				}
-			}
-
-			/* if node is leaf, fetch triangle list */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-				int prim_addr = __float_as_int(leaf.x);
+        node_addr = __float_as_int(cnodes.z);
+        node_addr_child1 = __float_as_int(cnodes.w);
+
+        if (traverse_mask == 3) {
+          /* Both children were intersected, push the farther one. */
+          bool is_closest_child1 = (dist[1] < dist[0]);
+          if (is_closest_child1) {
+            int tmp = node_addr;
+            node_addr = node_addr_child1;
+            node_addr_child1 = tmp;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = node_addr_child1;
+        }
+        else {
+          /* One child was intersected. */
+          if (traverse_mask == 2) {
+            node_addr = node_addr_child1;
+          }
+          else if (traverse_mask == 0) {
+            /* Neither child was intersected. */
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+
+      /* if node is leaf, fetch triangle list */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					const int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-					bool hit;
-
-					/* pop */
-					node_addr = traversal_stack[stack_ptr];
-					--stack_ptr;
-
-					/* primitive intersection */
-					switch(type & PRIMITIVE_ALL) {
-						case PRIMITIVE_TRIANGLE: {
-							/* intersect ray against primitive */
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* only primitives from volume object */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								hit = triangle_intersect(kg,
-								                         isect_array,
-								                         P,
-								                         dir,
-								                         visibility,
-								                         object,
-								                         prim_addr);
-								if(hit) {
-									/* Move on to next entry in intersections array. */
-									isect_array++;
-									num_hits++;
+          const int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+          bool hit;
+
+          /* pop */
+          node_addr = traversal_stack[stack_ptr];
+          --stack_ptr;
+
+          /* primitive intersection */
+          switch (type & PRIMITIVE_ALL) {
+            case PRIMITIVE_TRIANGLE: {
+              /* intersect ray against primitive */
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* only primitives from volume object */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
+                if (hit) {
+                  /* Move on to next entry in intersections array. */
+                  isect_array++;
+                  num_hits++;
 #if BVH_FEATURE(BVH_INSTANCING)
-									num_hits_in_instance++;
+                  num_hits_in_instance++;
 #endif
-									isect_array->t = isect_t;
-									if(num_hits == max_hits) {
+                  isect_array->t = isect_t;
+                  if (num_hits == max_hits) {
 #if BVH_FEATURE(BVH_INSTANCING)
-										if(object != OBJECT_NONE) {
+                    if (object != OBJECT_NONE) {
 #  if BVH_FEATURE(BVH_MOTION)
-											float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+                      float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
 #  else
-											Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-											float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+                      Transform itfm = object_fetch_transform(
+                          kg, object, OBJECT_INVERSE_TRANSFORM);
+                      float t_fac = 1.0f / len(transform_direction(&itfm, dir));
 #  endif
-											for(int i = 0; i < num_hits_in_instance; i++) {
-												(isect_array-i-1)->t *= t_fac;
-											}
-										}
-#endif  /* BVH_FEATURE(BVH_INSTANCING) */
-										return num_hits;
-									}
-								}
-							}
-							break;
-						}
+                      for (int i = 0; i < num_hits_in_instance; i++) {
+                        (isect_array - i - 1)->t *= t_fac;
+                      }
+                    }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+                    return num_hits;
+                  }
+                }
+              }
+              break;
+            }
 #if BVH_FEATURE(BVH_MOTION)
-						case PRIMITIVE_MOTION_TRIANGLE: {
-							/* intersect ray against primitive */
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* only primitives from volume object */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								hit = motion_triangle_intersect(kg,
-								                                isect_array,
-								                                P,
-								                                dir,
-								                                ray->time,
-								                                visibility,
-								                                object,
-								                                prim_addr);
-								if(hit) {
-									/* Move on to next entry in intersections array. */
-									isect_array++;
-									num_hits++;
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              /* intersect ray against primitive */
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* only primitives from volume object */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                hit = motion_triangle_intersect(
+                    kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
+                if (hit) {
+                  /* Move on to next entry in intersections array. */
+                  isect_array++;
+                  num_hits++;
 #  if BVH_FEATURE(BVH_INSTANCING)
-									num_hits_in_instance++;
+                  num_hits_in_instance++;
 #  endif
-									isect_array->t = isect_t;
-									if(num_hits == max_hits) {
+                  isect_array->t = isect_t;
+                  if (num_hits == max_hits) {
 #  if BVH_FEATURE(BVH_INSTANCING)
-										if(object != OBJECT_NONE) {
+                    if (object != OBJECT_NONE) {
 #    if BVH_FEATURE(BVH_MOTION)
-											float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+                      float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
 #    else
-											Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-											float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+                      Transform itfm = object_fetch_transform(
+                          kg, object, OBJECT_INVERSE_TRANSFORM);
+                      float t_fac = 1.0f / len(transform_direction(&itfm, dir));
 #    endif
-											for(int i = 0; i < num_hits_in_instance; i++) {
-												(isect_array-i-1)->t *= t_fac;
-											}
-										}
-#  endif  /* BVH_FEATURE(BVH_INSTANCING) */
-										return num_hits;
-									}
-								}
-							}
-							break;
-						}
-#endif  /* BVH_MOTION */
-						default: {
-							break;
-						}
-					}
-				}
+                      for (int i = 0; i < num_hits_in_instance; i++) {
+                        (isect_array - i - 1)->t *= t_fac;
+                      }
+                    }
+#  endif /* BVH_FEATURE(BVH_INSTANCING) */
+                    return num_hits;
+                  }
+                }
+              }
+              break;
+            }
+#endif /* BVH_MOTION */
+            default: {
+              break;
+            }
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* instance push */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
-					int object_flag = kernel_tex_fetch(__object_flag, object);
-					if(object_flag & SD_OBJECT_HAS_VOLUME) {
+        else {
+          /* instance push */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+          int object_flag = kernel_tex_fetch(__object_flag, object);
+          if (object_flag & SD_OBJECT_HAS_VOLUME) {
 #  if BVH_FEATURE(BVH_MOTION)
-						isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+            isect_t = bvh_instance_motion_push(
+                kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #  else
-						isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+            isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
-						num_hits_in_instance = 0;
-						isect_array->t = isect_t;
+            num_hits_in_instance = 0;
+            isect_array->t = isect_t;
 
 #  if defined(__KERNEL_SSE2__)
-						Psplat[0] = ssef(P.x);
-						Psplat[1] = ssef(P.y);
-						Psplat[2] = ssef(P.z);
+            Psplat[0] = ssef(P.x);
+            Psplat[1] = ssef(P.y);
+            Psplat[2] = ssef(P.z);
 
-						tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+            tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
 #    if BVH_FEATURE(BVH_HAIR)
-						tfar = ssef(isect_t);
+            tfar = ssef(isect_t);
 #    endif
 
-						gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+            gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #  endif
 
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_STACK_SIZE);
-						traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
-
-						node_addr = kernel_tex_fetch(__object_node, object);
-					}
-					else {
-						/* pop */
-						object = OBJECT_NONE;
-						node_addr = traversal_stack[stack_ptr];
-						--stack_ptr;
-					}
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_STACK_SIZE);
+            traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+
+            node_addr = kernel_tex_fetch(__object_node, object);
+          }
+          else {
+            /* pop */
+            object = OBJECT_NONE;
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* Instance pop. */
-			if(num_hits_in_instance) {
-				float t_fac;
+      /* Instance pop. */
+      if (num_hits_in_instance) {
+        float t_fac;
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
 #  else
-				bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
 #  endif
-				/* Scale isect->t to adjust for instancing. */
-				for(int i = 0; i < num_hits_in_instance; i++) {
-					(isect_array-i-1)->t *= t_fac;
-				}
-			}
-			else {
+        /* Scale isect->t to adjust for instancing. */
+        for (int i = 0; i < num_hits_in_instance; i++) {
+          (isect_array - i - 1)->t *= t_fac;
+        }
+      }
+      else {
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
 #  else
-				bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
 #  endif
-			}
+      }
 
-			isect_t = tmax;
-			isect_array->t = isect_t;
+      isect_t = tmax;
+      isect_array->t = isect_t;
 
 #  if defined(__KERNEL_SSE2__)
-			Psplat[0] = ssef(P.x);
-			Psplat[1] = ssef(P.y);
-			Psplat[2] = ssef(P.z);
+      Psplat[0] = ssef(P.x);
+      Psplat[1] = ssef(P.y);
+      Psplat[2] = ssef(P.z);
 
-			tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+      tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
 #    if BVH_FEATURE(BVH_HAIR)
-			tfar = ssef(isect_t);
+      tfar = ssef(isect_t);
 #    endif
 
-			gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+      gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr];
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_INSTANCING) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr];
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_INSTANCING) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return num_hits;
+  return num_hits;
 }
 
 ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
@@ -387,32 +382,20 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
                                          const uint max_hits,
                                          const uint visibility)
 {
-	switch(kernel_data.bvh.bvh_layout) {
+  switch (kernel_data.bvh.bvh_layout) {
 #ifdef __KERNEL_AVX2__
-		case BVH_LAYOUT_BVH8:
-			return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
-			                                    ray,
-			                                    isect_array,
-			                                    max_hits,
-			                                    visibility);
+    case BVH_LAYOUT_BVH8:
+      return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, max_hits, visibility);
 #endif
 #ifdef __QBVH__
-		case BVH_LAYOUT_BVH4:
-			return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
-			                                    ray,
-			                                    isect_array,
-			                                    max_hits,
-			                                    visibility);
+    case BVH_LAYOUT_BVH4:
+      return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, max_hits, visibility);
 #endif
-		case BVH_LAYOUT_BVH2:
-			return BVH_FUNCTION_FULL_NAME(BVH)(kg,
-			                                   ray,
-			                                   isect_array,
-			                                   max_hits,
-			                                   visibility);
-	}
-	kernel_assert(!"Should not happen");
-	return 0;
+    case BVH_LAYOUT_BVH2:
+      return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
+  }
+  kernel_assert(!"Should not happen");
+  return 0;
 }
 
 #undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/obvh_local.h b/intern/cycles/kernel/bvh/obvh_local.h
index f449cefb335..e6bb548bc5b 100644
--- a/intern/cycles/kernel/bvh/obvh_local.h
+++ b/intern/cycles/kernel/bvh/obvh_local.h
@@ -34,372 +34,365 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
                                              uint *lcg_state,
                                              int max_hits)
 {
-	/* Traversal stack in CUDA thread-local memory. */
-	OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
-	traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+  /* Traversal stack in CUDA thread-local memory. */
+  OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
+  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
 
-	/* Traversal variables in registers. */
-	int stack_ptr = 0;
-	int node_addr = kernel_tex_fetch(__object_node, local_object);
+  /* Traversal variables in registers. */
+  int stack_ptr = 0;
+  int node_addr = kernel_tex_fetch(__object_node, local_object);
 
-	/* Ray parameters in registers. */
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
-	float isect_t = ray->t;
+  /* Ray parameters in registers. */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  float isect_t = ray->t;
 
-	if(local_isect != NULL) {
-		local_isect->num_hits = 0;
-	}
-	kernel_assert((local_isect == NULL) == (max_hits == 0));
+  if (local_isect != NULL) {
+    local_isect->num_hits = 0;
+  }
+  kernel_assert((local_isect == NULL) == (max_hits == 0));
 
-	const int object_flag = kernel_tex_fetch(__object_flag, local_object);
-	if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+  const int object_flag = kernel_tex_fetch(__object_flag, local_object);
+  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
 #if BVH_FEATURE(BVH_MOTION)
-		Transform ob_itfm;
-		isect_t = bvh_instance_motion_push(kg,
-		                                   local_object,
-		                                   ray,
-		                                   &P,
-		                                   &dir,
-		                                   &idir,
-		                                   isect_t,
-		                                   &ob_itfm);
+    Transform ob_itfm;
+    isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #else
-		isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
+    isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
 #endif
-		object = local_object;
-	}
+    object = local_object;
+  }
 
-	avxf tnear(0.0f), tfar(isect_t);
+  avxf tnear(0.0f), tfar(isect_t);
 #if BVH_FEATURE(BVH_HAIR)
-	avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+  avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #endif
-	avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+  avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 
 #ifdef __KERNEL_AVX2__
-	float3 P_idir = P*idir;
-	avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+  float3 P_idir = P * idir;
+  avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-	avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
+  avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
 #endif
 
-	/* Offsets to select the side that becomes the lower or upper bound. */
-	int near_x, near_y, near_z;
-	int far_x, far_y, far_z;
-	obvh_near_far_idx_calc(idir,
-	                       &near_x, &near_y, &near_z,
-	                       &far_x, &far_y, &far_z);
+  /* Offsets to select the side that becomes the lower or upper bound. */
+  int near_x, near_y, near_z;
+  int far_x, far_y, far_z;
+  obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
 
-	/* Traversal loop. */
-	do {
-		do {
-			/* Traverse internal nodes. */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				avxf dist;
-				int child_mask = NODE_INTERSECT(kg,
-				                                tnear,
-				                                tfar,
+  /* Traversal loop. */
+  do {
+    do {
+      /* Traverse internal nodes. */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        avxf dist;
+        int child_mask = NODE_INTERSECT(kg,
+                                        tnear,
+                                        tfar,
 #ifdef __KERNEL_AVX2__
-				                                P_idir4,
+                                        P_idir4,
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-				                                org4,
+                                        org4,
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-				                                dir4,
+                                        dir4,
 #endif
-				                                idir4,
-				                                near_x, near_y, near_z,
-				                                far_x, far_y, far_z,
-				                                node_addr,
-				                                &dist);
+                                        idir4,
+                                        near_x,
+                                        near_y,
+                                        near_z,
+                                        far_x,
+                                        far_y,
+                                        far_z,
+                                        node_addr,
+                                        &dist);
 
-				if(child_mask != 0) {
-					float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-					avxf cnodes;
+        if (child_mask != 0) {
+          float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+          avxf cnodes;
 #if BVH_FEATURE(BVH_HAIR)
-					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-						cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
-					}
-					else
+          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
+          }
+          else
 #endif
-					{
-						cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
-					}
+          {
+            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
+          }
 
-					/* One child is hit, continue with that child. */
-					int r = __bscf(child_mask);
-					if(child_mask == 0) {
-						node_addr = __float_as_int(cnodes[r]);
-						continue;
-					}
+          /* One child is hit, continue with that child. */
+          int r = __bscf(child_mask);
+          if (child_mask == 0) {
+            node_addr = __float_as_int(cnodes[r]);
+            continue;
+          }
 
-					/* Two children are hit, push far child, and continue with
-					 * closer child.
-					 */
-					int c0 = __float_as_int(cnodes[r]);
-					float d0 = ((float*)&dist)[r];
-					r = __bscf(child_mask);
-					int c1 = __float_as_int(cnodes[r]);
-					float d1 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						if(d1 < d0) {
-							node_addr = c1;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c0;
-							traversal_stack[stack_ptr].dist = d0;
-							continue;
-						}
-						else {
-							node_addr = c0;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c1;
-							traversal_stack[stack_ptr].dist = d1;
-							continue;
-						}
-					}
+          /* Two children are hit, push far child, and continue with
+           * closer child.
+           */
+          int c0 = __float_as_int(cnodes[r]);
+          float d0 = ((float *)&dist)[r];
+          r = __bscf(child_mask);
+          int c1 = __float_as_int(cnodes[r]);
+          float d1 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            if (d1 < d0) {
+              node_addr = c1;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c0;
+              traversal_stack[stack_ptr].dist = d0;
+              continue;
+            }
+            else {
+              node_addr = c0;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c1;
+              traversal_stack[stack_ptr].dist = d1;
+              continue;
+            }
+          }
 
-					/* Here starts the slow path for 3 or 4 hit children. We push
-					 * all nodes onto the stack to sort them there.
-					 */
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c1;
-					traversal_stack[stack_ptr].dist = d1;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c0;
-					traversal_stack[stack_ptr].dist = d0;
+          /* Here starts the slow path for 3 or 4 hit children. We push
+           * all nodes onto the stack to sort them there.
+           */
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c1;
+          traversal_stack[stack_ptr].dist = d1;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c0;
+          traversal_stack[stack_ptr].dist = d0;
 
-					/* Three children are hit, push all onto stack and sort 3
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c2 = __float_as_int(cnodes[r]);
-					float d2 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
+          /* Three children are hit, push all onto stack and sort 3
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c2 = __float_as_int(cnodes[r]);
+          float d2 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
 
-					/* Four children are hit, push all onto stack and sort 4
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c3 = __float_as_int(cnodes[r]);
-					float d3 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c3;
-						traversal_stack[stack_ptr].dist = d3;
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
+          /* Four children are hit, push all onto stack and sort 4
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c3 = __float_as_int(cnodes[r]);
+          float d3 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c3;
+            traversal_stack[stack_ptr].dist = d3;
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
 
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c3;
-					traversal_stack[stack_ptr].dist = d3;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c2;
-					traversal_stack[stack_ptr].dist = d2;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c3;
+          traversal_stack[stack_ptr].dist = d3;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c2;
+          traversal_stack[stack_ptr].dist = d2;
 
-					/* Five children are hit, push all onto stack and sort 5
-					 * stack items, continue with closest child
-					 */
-					r = __bscf(child_mask);
-					int c4 = __float_as_int(cnodes[r]);
-					float d4 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c4;
-						traversal_stack[stack_ptr].dist = d4;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-					/* Six children are hit, push all onto stack and sort 6
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c5 = __float_as_int(cnodes[r]);
-					float d5 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c5;
-						traversal_stack[stack_ptr].dist = d5;
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c4;
-						traversal_stack[stack_ptr].dist = d4;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4],
-						                &traversal_stack[stack_ptr - 5]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
+          /* Five children are hit, push all onto stack and sort 5
+           * stack items, continue with closest child
+           */
+          r = __bscf(child_mask);
+          int c4 = __float_as_int(cnodes[r]);
+          float d4 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c4;
+            traversal_stack[stack_ptr].dist = d4;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+          /* Six children are hit, push all onto stack and sort 6
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c5 = __float_as_int(cnodes[r]);
+          float d5 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c5;
+            traversal_stack[stack_ptr].dist = d5;
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c4;
+            traversal_stack[stack_ptr].dist = d4;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4],
+                            &traversal_stack[stack_ptr - 5]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
 
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c5;
-					traversal_stack[stack_ptr].dist = d5;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c4;
-					traversal_stack[stack_ptr].dist = d4;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c5;
+          traversal_stack[stack_ptr].dist = d5;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c4;
+          traversal_stack[stack_ptr].dist = d4;
 
-					/* Seven children are hit, push all onto stack and sort 7
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c6 = __float_as_int(cnodes[r]);
-					float d6 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c6;
-						traversal_stack[stack_ptr].dist = d6;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4],
-						                &traversal_stack[stack_ptr - 5],
-						                &traversal_stack[stack_ptr - 6]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-					/* Eight children are hit, push all onto stack and sort 8
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c7 = __float_as_int(cnodes[r]);
-					float d7 = ((float*)&dist)[r];
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c7;
-					traversal_stack[stack_ptr].dist = d7;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c6;
-					traversal_stack[stack_ptr].dist = d6;
-					obvh_stack_sort(&traversal_stack[stack_ptr],
-					                &traversal_stack[stack_ptr - 1],
-					                &traversal_stack[stack_ptr - 2],
-					                &traversal_stack[stack_ptr - 3],
-					                &traversal_stack[stack_ptr - 4],
-					                &traversal_stack[stack_ptr - 5],
-					                &traversal_stack[stack_ptr - 6],
-					                &traversal_stack[stack_ptr - 7]);
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
+          /* Seven children are hit, push all onto stack and sort 7
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c6 = __float_as_int(cnodes[r]);
+          float d6 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c6;
+            traversal_stack[stack_ptr].dist = d6;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4],
+                            &traversal_stack[stack_ptr - 5],
+                            &traversal_stack[stack_ptr - 6]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+          /* Eight children are hit, push all onto stack and sort 8
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c7 = __float_as_int(cnodes[r]);
+          float d7 = ((float *)&dist)[r];
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c7;
+          traversal_stack[stack_ptr].dist = d7;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c6;
+          traversal_stack[stack_ptr].dist = d6;
+          obvh_stack_sort(&traversal_stack[stack_ptr],
+                          &traversal_stack[stack_ptr - 1],
+                          &traversal_stack[stack_ptr - 2],
+                          &traversal_stack[stack_ptr - 3],
+                          &traversal_stack[stack_ptr - 4],
+                          &traversal_stack[stack_ptr - 5],
+                          &traversal_stack[stack_ptr - 6],
+                          &traversal_stack[stack_ptr - 7]);
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
 
-				node_addr = traversal_stack[stack_ptr].addr;
-				--stack_ptr;
-			}
+        node_addr = traversal_stack[stack_ptr].addr;
+        --stack_ptr;
+      }
 
-			/* If node is leaf, fetch triangle list. */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-				int prim_addr = __float_as_int(leaf.x);
+      /* If node is leaf, fetch triangle list. */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
 
-				int prim_addr2 = __float_as_int(leaf.y);
-				const uint type = __float_as_int(leaf.w);
+        int prim_addr2 = __float_as_int(leaf.y);
+        const uint type = __float_as_int(leaf.w);
 
-				/* Pop. */
-				node_addr = traversal_stack[stack_ptr].addr;
-				--stack_ptr;
+        /* Pop. */
+        node_addr = traversal_stack[stack_ptr].addr;
+        --stack_ptr;
 
-				/* Primitive intersection. */
-				switch(type & PRIMITIVE_ALL) {
-					case PRIMITIVE_TRIANGLE: {
-						/* Intersect ray against primitive, */
-						for(; prim_addr < prim_addr2; prim_addr++) {
-							kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-							if(triangle_intersect_local(kg,
-							                            local_isect,
-							                            P,
-							                            dir,
-							                            object,
-							                            local_object,
-							                            prim_addr,
-							                            isect_t,
-							                            lcg_state,
-							                            max_hits))
-							{
-								return true;
-							}
-						}
-						break;
-					}
+        /* Primitive intersection. */
+        switch (type & PRIMITIVE_ALL) {
+          case PRIMITIVE_TRIANGLE: {
+            /* Intersect ray against primitive, */
+            for (; prim_addr < prim_addr2; prim_addr++) {
+              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+              if (triangle_intersect_local(kg,
+                                           local_isect,
+                                           P,
+                                           dir,
+                                           object,
+                                           local_object,
+                                           prim_addr,
+                                           isect_t,
+                                           lcg_state,
+                                           max_hits)) {
+                return true;
+              }
+            }
+            break;
+          }
 #if BVH_FEATURE(BVH_MOTION)
-					case PRIMITIVE_MOTION_TRIANGLE: {
-						/* Intersect ray against primitive. */
-						for(; prim_addr < prim_addr2; prim_addr++) {
-							kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-							if(motion_triangle_intersect_local(kg,
-							                                   local_isect,
-							                                   P,
-							                                   dir,
-							                                   ray->time,
-							                                   object,
-							                                   local_object,
-							                                   prim_addr,
-							                                   isect_t,
-							                                   lcg_state,
-							                                   max_hits))
-							{
-								return true;
-							}
-						}
-						break;
-					}
+          case PRIMITIVE_MOTION_TRIANGLE: {
+            /* Intersect ray against primitive. */
+            for (; prim_addr < prim_addr2; prim_addr++) {
+              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+              if (motion_triangle_intersect_local(kg,
+                                                  local_isect,
+                                                  P,
+                                                  dir,
+                                                  ray->time,
+                                                  object,
+                                                  local_object,
+                                                  prim_addr,
+                                                  isect_t,
+                                                  lcg_state,
+                                                  max_hits)) {
+                return true;
+              }
+            }
+            break;
+          }
 #endif
-					default:
-						break;
-				}
-			}
-		} while(node_addr != ENTRYPOINT_SENTINEL);
-	} while(node_addr != ENTRYPOINT_SENTINEL);
-	return false;
+          default:
+            break;
+        }
+      }
+    } while (node_addr != ENTRYPOINT_SENTINEL);
+  } while (node_addr != ENTRYPOINT_SENTINEL);
+  return false;
 }
 
 #undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_nodes.h b/intern/cycles/kernel/bvh/obvh_nodes.h
index 93f35f6dffb..6831562cade 100644
--- a/intern/cycles/kernel/bvh/obvh_nodes.h
+++ b/intern/cycles/kernel/bvh/obvh_nodes.h
@@ -17,11 +17,11 @@
  */
 
 struct OBVHStackItem {
-	int addr;
-	float dist;
+  int addr;
+  float dist;
 };
 
-ccl_device_inline void obvh_near_far_idx_calc(const float3& idir,
+ccl_device_inline void obvh_near_far_idx_calc(const float3 &idir,
                                               int *ccl_restrict near_x,
                                               int *ccl_restrict near_y,
                                               int *ccl_restrict near_z,
@@ -31,41 +31,73 @@ ccl_device_inline void obvh_near_far_idx_calc(const float3& idir,
 
 {
 #ifdef __KERNEL_SSE__
-	*near_x = 0; *far_x = 1;
-	*near_y = 2; *far_y = 3;
-	*near_z = 4; *far_z = 5;
-
-	const size_t mask = movemask(ssef(idir.m128));
-
-	const int mask_x = mask & 1;
-	const int mask_y = (mask & 2) >> 1;
-	const int mask_z = (mask & 4) >> 2;
-
-	*near_x += mask_x; *far_x -= mask_x;
-	*near_y += mask_y; *far_y -= mask_y;
-	*near_z += mask_z; *far_z -= mask_z;
+  *near_x = 0;
+  *far_x = 1;
+  *near_y = 2;
+  *far_y = 3;
+  *near_z = 4;
+  *far_z = 5;
+
+  const size_t mask = movemask(ssef(idir.m128));
+
+  const int mask_x = mask & 1;
+  const int mask_y = (mask & 2) >> 1;
+  const int mask_z = (mask & 4) >> 2;
+
+  *near_x += mask_x;
+  *far_x -= mask_x;
+  *near_y += mask_y;
+  *far_y -= mask_y;
+  *near_z += mask_z;
+  *far_z -= mask_z;
 #else
-	if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; }
-	if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; }
-	if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; }
+  if (idir.x >= 0.0f) {
+    *near_x = 0;
+    *far_x = 1;
+  }
+  else {
+    *near_x = 1;
+    *far_x = 0;
+  }
+  if (idir.y >= 0.0f) {
+    *near_y = 2;
+    *far_y = 3;
+  }
+  else {
+    *near_y = 3;
+    *far_y = 2;
+  }
+  if (idir.z >= 0.0f) {
+    *near_z = 4;
+    *far_z = 5;
+  }
+  else {
+    *near_z = 5;
+    *far_z = 4;
+  }
 #endif
 }
 
-ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a,
-                                      OBVHStackItem *ccl_restrict b)
+ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, OBVHStackItem *ccl_restrict b)
 {
-	OBVHStackItem tmp = *a;
-	*a = *b;
-	*b = tmp;
+  OBVHStackItem tmp = *a;
+  *a = *b;
+  *b = tmp;
 }
 
 ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
                                        OBVHStackItem *ccl_restrict s2,
                                        OBVHStackItem *ccl_restrict s3)
 {
-	if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
-	if(s3->dist < s2->dist) { obvh_item_swap(s3, s2); }
-	if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
+  if (s2->dist < s1->dist) {
+    obvh_item_swap(s2, s1);
+  }
+  if (s3->dist < s2->dist) {
+    obvh_item_swap(s3, s2);
+  }
+  if (s2->dist < s1->dist) {
+    obvh_item_swap(s2, s1);
+  }
 }
 
 ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -73,11 +105,21 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
                                        OBVHStackItem *ccl_restrict s3,
                                        OBVHStackItem *ccl_restrict s4)
 {
-	if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
-	if(s4->dist < s3->dist) { obvh_item_swap(s4, s3); }
-	if(s3->dist < s1->dist) { obvh_item_swap(s3, s1); }
-	if(s4->dist < s2->dist) { obvh_item_swap(s4, s2); }
-	if(s3->dist < s2->dist) { obvh_item_swap(s3, s2); }
+  if (s2->dist < s1->dist) {
+    obvh_item_swap(s2, s1);
+  }
+  if (s4->dist < s3->dist) {
+    obvh_item_swap(s4, s3);
+  }
+  if (s3->dist < s1->dist) {
+    obvh_item_swap(s3, s1);
+  }
+  if (s4->dist < s2->dist) {
+    obvh_item_swap(s4, s2);
+  }
+  if (s3->dist < s2->dist) {
+    obvh_item_swap(s3, s2);
+  }
 }
 
 ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -86,19 +128,19 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
                                        OBVHStackItem *ccl_restrict s4,
                                        OBVHStackItem *ccl_restrict s5)
 {
-	obvh_stack_sort(s1, s2, s3, s4);
-	if(s5->dist < s4->dist) {
-		obvh_item_swap(s4, s5);
-		if(s4->dist < s3->dist) {
-			obvh_item_swap(s3, s4);
-			if(s3->dist < s2->dist) {
-				obvh_item_swap(s2, s3);
-				if(s2->dist < s1->dist) {
-					obvh_item_swap(s1, s2);
-				}
-			}
-		}
-	}
+  obvh_stack_sort(s1, s2, s3, s4);
+  if (s5->dist < s4->dist) {
+    obvh_item_swap(s4, s5);
+    if (s4->dist < s3->dist) {
+      obvh_item_swap(s3, s4);
+      if (s3->dist < s2->dist) {
+        obvh_item_swap(s2, s3);
+        if (s2->dist < s1->dist) {
+          obvh_item_swap(s1, s2);
+        }
+      }
+    }
+  }
 }
 
 ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -108,22 +150,22 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
                                        OBVHStackItem *ccl_restrict s5,
                                        OBVHStackItem *ccl_restrict s6)
 {
-	obvh_stack_sort(s1, s2, s3, s4, s5);
-	if(s6->dist < s5->dist) {
-		obvh_item_swap(s5, s6);
-		if(s5->dist < s4->dist) {
-			obvh_item_swap(s4, s5);
-			if(s4->dist < s3->dist) {
-				obvh_item_swap(s3, s4);
-				if(s3->dist < s2->dist) {
-					obvh_item_swap(s2, s3);
-					if(s2->dist < s1->dist) {
-						obvh_item_swap(s1, s2);
-					}
-				}
-			}
-		}
-	}
+  obvh_stack_sort(s1, s2, s3, s4, s5);
+  if (s6->dist < s5->dist) {
+    obvh_item_swap(s5, s6);
+    if (s5->dist < s4->dist) {
+      obvh_item_swap(s4, s5);
+      if (s4->dist < s3->dist) {
+        obvh_item_swap(s3, s4);
+        if (s3->dist < s2->dist) {
+          obvh_item_swap(s2, s3);
+          if (s2->dist < s1->dist) {
+            obvh_item_swap(s1, s2);
+          }
+        }
+      }
+    }
+  }
 }
 
 ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -134,25 +176,25 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
                                        OBVHStackItem *ccl_restrict s6,
                                        OBVHStackItem *ccl_restrict s7)
 {
-	obvh_stack_sort(s1, s2, s3, s4, s5, s6);
-	if(s7->dist < s6->dist) {
-		obvh_item_swap(s6, s7);
-		if(s6->dist < s5->dist) {
-			obvh_item_swap(s5, s6);
-			if(s5->dist < s4->dist) {
-				obvh_item_swap(s4, s5);
-				if(s4->dist < s3->dist) {
-					obvh_item_swap(s3, s4);
-					if(s3->dist < s2->dist) {
-						obvh_item_swap(s2, s3);
-						if(s2->dist < s1->dist) {
-							obvh_item_swap(s1, s2);
-						}
-					}
-				}
-			}
-		}
-	}
+  obvh_stack_sort(s1, s2, s3, s4, s5, s6);
+  if (s7->dist < s6->dist) {
+    obvh_item_swap(s6, s7);
+    if (s6->dist < s5->dist) {
+      obvh_item_swap(s5, s6);
+      if (s5->dist < s4->dist) {
+        obvh_item_swap(s4, s5);
+        if (s4->dist < s3->dist) {
+          obvh_item_swap(s3, s4);
+          if (s3->dist < s2->dist) {
+            obvh_item_swap(s2, s3);
+            if (s2->dist < s1->dist) {
+              obvh_item_swap(s1, s2);
+            }
+          }
+        }
+      }
+    }
+  }
 }
 
 ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -164,41 +206,41 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
                                        OBVHStackItem *ccl_restrict s7,
                                        OBVHStackItem *ccl_restrict s8)
 {
-	obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7);
-	if(s8->dist < s7->dist) {
-		obvh_item_swap(s7, s8);
-		if(s7->dist < s6->dist) {
-			obvh_item_swap(s6, s7);
-			if(s6->dist < s5->dist) {
-				obvh_item_swap(s5, s6);
-				if(s5->dist < s4->dist) {
-					obvh_item_swap(s4, s5);
-					if(s4->dist < s3->dist) {
-						obvh_item_swap(s3, s4);
-						if(s3->dist < s2->dist) {
-							obvh_item_swap(s2, s3);
-							if(s2->dist < s1->dist) {
-								obvh_item_swap(s1, s2);
-							}
-						}
-					}
-				}
-			}
-		}
-	}
+  obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7);
+  if (s8->dist < s7->dist) {
+    obvh_item_swap(s7, s8);
+    if (s7->dist < s6->dist) {
+      obvh_item_swap(s6, s7);
+      if (s6->dist < s5->dist) {
+        obvh_item_swap(s5, s6);
+        if (s5->dist < s4->dist) {
+          obvh_item_swap(s4, s5);
+          if (s4->dist < s3->dist) {
+            obvh_item_swap(s3, s4);
+            if (s3->dist < s2->dist) {
+              obvh_item_swap(s2, s3);
+              if (s2->dist < s1->dist) {
+                obvh_item_swap(s1, s2);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
 }
 
 /* Axis-aligned nodes intersection */
 
 ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
-                                                  const avxf& isect_near,
-                                                  const avxf& isect_far,
+                                                  const avxf &isect_near,
+                                                  const avxf &isect_far,
 #ifdef __KERNEL_AVX2__
-                                                  const avx3f& org_idir,
+                                                  const avx3f &org_idir,
 #else
-                                                  const avx3f& org,
+                                                  const avx3f &org,
 #endif
-                                                  const avx3f& idir,
+                                                  const avx3f &idir,
                                                   const int near_x,
                                                   const int near_y,
                                                   const int near_z,
@@ -208,213 +250,216 @@ ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg
                                                   const int node_addr,
                                                   avxf *ccl_restrict dist)
 {
-	const int offset = node_addr + 2;
+  const int offset = node_addr + 2;
 #ifdef __KERNEL_AVX2__
-	const avxf tnear_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_x*2), idir.x, org_idir.x);
-	const avxf tnear_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_y*2), idir.y, org_idir.y);
-	const avxf tnear_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_z*2), idir.z, org_idir.z);
-	const avxf tfar_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_x*2), idir.x, org_idir.x);
-	const avxf tfar_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_y*2), idir.y, org_idir.y);
-	const avxf tfar_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_z*2), idir.z, org_idir.z);
-
-	const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
-	const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
-	const avxb vmask = tnear <= tfar;
-	int mask = (int)movemask(vmask);
-	*dist = tnear;
-	return mask;
+  const avxf tnear_x = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, org_idir.x);
+  const avxf tnear_y = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, org_idir.y);
+  const avxf tnear_z = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, org_idir.z);
+  const avxf tfar_x = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, org_idir.x);
+  const avxf tfar_y = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, org_idir.y);
+  const avxf tfar_z = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, org_idir.z);
+
+  const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
+  const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
+  const avxb vmask = tnear <= tfar;
+  int mask = (int)movemask(vmask);
+  *dist = tnear;
+  return mask;
 #else
-	return 0;
+  return 0;
 #endif
 }
 
-ccl_device_inline int obvh_aligned_node_intersect_robust(
-        KernelGlobals *ccl_restrict kg,
-        const avxf& isect_near,
-        const avxf& isect_far,
+ccl_device_inline int obvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+                                                         const avxf &isect_near,
+                                                         const avxf &isect_far,
 #ifdef __KERNEL_AVX2__
-        const avx3f& P_idir,
+                                                         const avx3f &P_idir,
 #else
-        const avx3f& P,
+                                                         const avx3f &P,
 #endif
-        const avx3f& idir,
-        const int near_x,
-        const int near_y,
-        const int near_z,
-        const int far_x,
-        const int far_y,
-        const int far_z,
-        const int node_addr,
-        const float difl,
-        avxf *ccl_restrict dist)
+                                                         const avx3f &idir,
+                                                         const int near_x,
+                                                         const int near_y,
+                                                         const int near_z,
+                                                         const int far_x,
+                                                         const int far_y,
+                                                         const int far_z,
+                                                         const int node_addr,
+                                                         const float difl,
+                                                         avxf *ccl_restrict dist)
 {
-	const int offset = node_addr + 2;
+  const int offset = node_addr + 2;
 #ifdef __KERNEL_AVX2__
-	const avxf tnear_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x);
-	const avxf tfar_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x);
-	const avxf tnear_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y);
-	const avxf tfar_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y);
-	const avxf tnear_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z);
-	const avxf tfar_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z);
-
-	const float round_down = 1.0f - difl;
-	const float round_up = 1.0f + difl;
-	const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
-	const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
-	const avxb vmask = round_down*tnear <= round_up*tfar;
-	int mask = (int)movemask(vmask);
-	*dist = tnear;
-	return mask;
+  const avxf tnear_x = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x);
+  const avxf tfar_x = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x);
+  const avxf tnear_y = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y);
+  const avxf tfar_y = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y);
+  const avxf tnear_z = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z);
+  const avxf tfar_z = msub(
+      kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z);
+
+  const float round_down = 1.0f - difl;
+  const float round_up = 1.0f + difl;
+  const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
+  const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
+  const avxb vmask = round_down * tnear <= round_up * tfar;
+  int mask = (int)movemask(vmask);
+  *dist = tnear;
+  return mask;
 #else
-	return 0;
+  return 0;
 #endif
 }
 
 /* Unaligned nodes intersection */
 
-ccl_device_inline int obvh_unaligned_node_intersect(
-        KernelGlobals *ccl_restrict kg,
-        const avxf& isect_near,
-        const avxf& isect_far,
+ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
+                                                    const avxf &isect_near,
+                                                    const avxf &isect_far,
 #ifdef __KERNEL_AVX2__
-        const avx3f& org_idir,
+                                                    const avx3f &org_idir,
 #endif
-        const avx3f& org,
-        const avx3f& dir,
-        const avx3f& idir,
-        const int near_x,
-        const int near_y,
-        const int near_z,
-        const int far_x,
-        const int far_y,
-        const int far_z,
-        const int node_addr,
-        avxf *ccl_restrict dist)
+                                                    const avx3f &org,
+                                                    const avx3f &dir,
+                                                    const avx3f &idir,
+                                                    const int near_x,
+                                                    const int near_y,
+                                                    const int near_z,
+                                                    const int far_x,
+                                                    const int far_y,
+                                                    const int far_z,
+                                                    const int node_addr,
+                                                    avxf *ccl_restrict dist)
 {
-	const int offset = node_addr;
-	const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+2);
-	const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+4);
-	const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+6);
-
-	const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+8);
-	const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+10);
-	const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+12);
-
-	const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+14);
-	const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+16);
-	const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+18);
-
-	const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+20);
-	const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+22);
-	const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+24);
-
-	const avxf aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
-	           aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
-	           aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
-
-	const avxf aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x,
-	           aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y,
-	           aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z;
-
-	const avxf neg_one(-1.0f);
-	const avxf nrdir_x = neg_one / aligned_dir_x,
-	           nrdir_y = neg_one / aligned_dir_y,
-	           nrdir_z = neg_one / aligned_dir_z;
-
-	const avxf tlower_x = aligned_P_x * nrdir_x,
-	           tlower_y = aligned_P_y * nrdir_y,
-	           tlower_z = aligned_P_z * nrdir_z;
-
-	const avxf tupper_x = tlower_x - nrdir_x,
-	           tupper_y = tlower_y - nrdir_y,
-	           tupper_z = tlower_z - nrdir_z;
-
-	const avxf tnear_x = min(tlower_x, tupper_x);
-	const avxf tnear_y = min(tlower_y, tupper_y);
-	const avxf tnear_z = min(tlower_z, tupper_z);
-	const avxf tfar_x = max(tlower_x, tupper_x);
-	const avxf tfar_y = max(tlower_y, tupper_y);
-	const avxf tfar_z = max(tlower_z, tupper_z);
-	const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-	const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-	const avxb vmask = tnear <= tfar;
-	*dist = tnear;
-	return movemask(vmask);
+  const int offset = node_addr;
+  const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
+  const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
+  const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
+
+  const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
+  const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
+  const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
+
+  const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
+  const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
+  const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
+
+  const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
+  const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
+  const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
+
+  const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
+             aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
+             aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
+
+  const avxf aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
+             aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
+             aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
+
+  const avxf neg_one(-1.0f);
+  const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
+             nrdir_z = neg_one / aligned_dir_z;
+
+  const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
+             tlower_z = aligned_P_z * nrdir_z;
+
+  const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
+             tupper_z = tlower_z - nrdir_z;
+
+  const avxf tnear_x = min(tlower_x, tupper_x);
+  const avxf tnear_y = min(tlower_y, tupper_y);
+  const avxf tnear_z = min(tlower_z, tupper_z);
+  const avxf tfar_x = max(tlower_x, tupper_x);
+  const avxf tfar_y = max(tlower_y, tupper_y);
+  const avxf tfar_z = max(tlower_z, tupper_z);
+  const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+  const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+  const avxb vmask = tnear <= tfar;
+  *dist = tnear;
+  return movemask(vmask);
 }
 
-ccl_device_inline int obvh_unaligned_node_intersect_robust(
-        KernelGlobals *ccl_restrict kg,
-        const avxf& isect_near,
-        const avxf& isect_far,
+ccl_device_inline int obvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+                                                           const avxf &isect_near,
+                                                           const avxf &isect_far,
 #ifdef __KERNEL_AVX2__
-        const avx3f& P_idir,
+                                                           const avx3f &P_idir,
 #endif
-        const avx3f& P,
-        const avx3f& dir,
-        const avx3f& idir,
-        const int near_x,
-        const int near_y,
-        const int near_z,
-        const int far_x,
-        const int far_y,
-        const int far_z,
-        const int node_addr,
-        const float difl,
-        avxf *ccl_restrict dist)
+                                                           const avx3f &P,
+                                                           const avx3f &dir,
+                                                           const avx3f &idir,
+                                                           const int near_x,
+                                                           const int near_y,
+                                                           const int near_z,
+                                                           const int far_x,
+                                                           const int far_y,
+                                                           const int far_z,
+                                                           const int node_addr,
+                                                           const float difl,
+                                                           avxf *ccl_restrict dist)
 {
-	const int offset = node_addr;
-	const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+2);
-	const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+4);
-	const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+6);
-
-	const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+8);
-	const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+10);
-	const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+12);
-
-	const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+14);
-	const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+16);
-	const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+18);
-
-	const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+20);
-	const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+22);
-	const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+24);
-
-	const avxf aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
-	           aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
-	           aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
-
-	const avxf aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x,
-	           aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y,
-	           aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z;
-
-	const avxf neg_one(-1.0f);
-	const avxf nrdir_x = neg_one / aligned_dir_x,
-	           nrdir_y = neg_one / aligned_dir_y,
-	           nrdir_z = neg_one / aligned_dir_z;
-
-	const avxf tlower_x = aligned_P_x * nrdir_x,
-	           tlower_y = aligned_P_y * nrdir_y,
-	           tlower_z = aligned_P_z * nrdir_z;
-
-	const avxf tupper_x = tlower_x - nrdir_x,
-	           tupper_y = tlower_y - nrdir_y,
-	           tupper_z = tlower_z - nrdir_z;
-
-	const float round_down = 1.0f - difl;
-	const float round_up = 1.0f + difl;
-
-	const avxf tnear_x = min(tlower_x, tupper_x);
-	const avxf tnear_y = min(tlower_y, tupper_y);
-	const avxf tnear_z = min(tlower_z, tupper_z);
-	const avxf tfar_x = max(tlower_x, tupper_x);
-	const avxf tfar_y = max(tlower_y, tupper_y);
-	const avxf tfar_z = max(tlower_z, tupper_z);
-
-	const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-	const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-	const avxb vmask = round_down*tnear <= round_up*tfar;
-	*dist = tnear;
-	return movemask(vmask);
+  const int offset = node_addr;
+  const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
+  const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
+  const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
+
+  const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
+  const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
+  const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
+
+  const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
+  const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
+  const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
+
+  const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
+  const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
+  const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
+
+  const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
+             aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
+             aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
+
+  const avxf aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x,
+             aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y,
+             aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z;
+
+  const avxf neg_one(-1.0f);
+  const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
+             nrdir_z = neg_one / aligned_dir_z;
+
+  const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
+             tlower_z = aligned_P_z * nrdir_z;
+
+  const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
+             tupper_z = tlower_z - nrdir_z;
+
+  const float round_down = 1.0f - difl;
+  const float round_up = 1.0f + difl;
+
+  const avxf tnear_x = min(tlower_x, tupper_x);
+  const avxf tnear_y = min(tlower_y, tupper_y);
+  const avxf tnear_z = min(tlower_z, tupper_z);
+  const avxf tfar_x = max(tlower_x, tupper_x);
+  const avxf tfar_y = max(tlower_y, tupper_y);
+  const avxf tfar_z = max(tlower_z, tupper_z);
+
+  const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+  const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+  const avxb vmask = round_down * tnear <= round_up * tfar;
+  *dist = tnear;
+  return movemask(vmask);
 }
 
 /* Intersectors wrappers.
@@ -422,111 +467,125 @@ ccl_device_inline int obvh_unaligned_node_intersect_robust(
  * They'll check node type and call appropriate intersection code.
  */
 
-ccl_device_inline int obvh_node_intersect(
-        KernelGlobals *ccl_restrict kg,
-        const avxf& isect_near,
-        const avxf& isect_far,
+ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg,
+                                          const avxf &isect_near,
+                                          const avxf &isect_far,
 #ifdef __KERNEL_AVX2__
-        const avx3f& org_idir,
+                                          const avx3f &org_idir,
 #endif
-        const avx3f& org,
-        const avx3f& dir,
-        const avx3f& idir,
-        const int near_x,
-        const int near_y,
-        const int near_z,
-        const int far_x,
-        const int far_y,
-        const int far_z,
-        const int node_addr,
-        avxf *ccl_restrict dist)
+                                          const avx3f &org,
+                                          const avx3f &dir,
+                                          const avx3f &idir,
+                                          const int near_x,
+                                          const int near_y,
+                                          const int near_z,
+                                          const int far_x,
+                                          const int far_y,
+                                          const int far_z,
+                                          const int node_addr,
+                                          avxf *ccl_restrict dist)
 {
-	const int offset = node_addr;
-	const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
-	if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-		return obvh_unaligned_node_intersect(kg,
-		                                     isect_near,
-		                                     isect_far,
+  const int offset = node_addr;
+  const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
+  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+    return obvh_unaligned_node_intersect(kg,
+                                         isect_near,
+                                         isect_far,
 #ifdef __KERNEL_AVX2__
-		                                     org_idir,
+                                         org_idir,
 #endif
-		                                     org,
-		                                     dir,
-		                                     idir,
-		                                     near_x, near_y, near_z,
-		                                     far_x, far_y, far_z,
-		                                     node_addr,
-		                                     dist);
-	}
-	else {
-		return obvh_aligned_node_intersect(kg,
-		                                   isect_near,
-		                                   isect_far,
+                                         org,
+                                         dir,
+                                         idir,
+                                         near_x,
+                                         near_y,
+                                         near_z,
+                                         far_x,
+                                         far_y,
+                                         far_z,
+                                         node_addr,
+                                         dist);
+  }
+  else {
+    return obvh_aligned_node_intersect(kg,
+                                       isect_near,
+                                       isect_far,
 #ifdef __KERNEL_AVX2__
-		                                   org_idir,
+                                       org_idir,
 #else
-		                                   org,
+                                       org,
 #endif
-		                                   idir,
-		                                   near_x, near_y, near_z,
-		                                   far_x, far_y, far_z,
-		                                   node_addr,
-		                                   dist);
-	}
+                                       idir,
+                                       near_x,
+                                       near_y,
+                                       near_z,
+                                       far_x,
+                                       far_y,
+                                       far_z,
+                                       node_addr,
+                                       dist);
+  }
 }
 
-ccl_device_inline int obvh_node_intersect_robust(
-        KernelGlobals *ccl_restrict kg,
-        const avxf& isect_near,
-        const avxf& isect_far,
+ccl_device_inline int obvh_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+                                                 const avxf &isect_near,
+                                                 const avxf &isect_far,
 #ifdef __KERNEL_AVX2__
-        const avx3f& P_idir,
+                                                 const avx3f &P_idir,
 #endif
-        const avx3f& P,
-        const avx3f& dir,
-        const avx3f& idir,
-        const int near_x,
-        const int near_y,
-        const int near_z,
-        const int far_x,
-        const int far_y,
-        const int far_z,
-        const int node_addr,
-        const float difl,
-        avxf *ccl_restrict dist)
+                                                 const avx3f &P,
+                                                 const avx3f &dir,
+                                                 const avx3f &idir,
+                                                 const int near_x,
+                                                 const int near_y,
+                                                 const int near_z,
+                                                 const int far_x,
+                                                 const int far_y,
+                                                 const int far_z,
+                                                 const int node_addr,
+                                                 const float difl,
+                                                 avxf *ccl_restrict dist)
 {
-	const int offset = node_addr;
-	const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
-	if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-		return obvh_unaligned_node_intersect_robust(kg,
-		                                            isect_near,
-		                                            isect_far,
+  const int offset = node_addr;
+  const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
+  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+    return obvh_unaligned_node_intersect_robust(kg,
+                                                isect_near,
+                                                isect_far,
 #ifdef __KERNEL_AVX2__
-		                                            P_idir,
+                                                P_idir,
 #endif
-		                                            P,
-		                                            dir,
-		                                            idir,
-		                                            near_x, near_y, near_z,
-		                                            far_x, far_y, far_z,
-		                                            node_addr,
-		                                            difl,
-		                                            dist);
-	}
-	else {
-		return obvh_aligned_node_intersect_robust(kg,
-		                                          isect_near,
-		                                          isect_far,
+                                                P,
+                                                dir,
+                                                idir,
+                                                near_x,
+                                                near_y,
+                                                near_z,
+                                                far_x,
+                                                far_y,
+                                                far_z,
+                                                node_addr,
+                                                difl,
+                                                dist);
+  }
+  else {
+    return obvh_aligned_node_intersect_robust(kg,
+                                              isect_near,
+                                              isect_far,
 #ifdef __KERNEL_AVX2__
-		                                          P_idir,
+                                              P_idir,
 #else
-		                                          P,
+                                              P,
 #endif
-		                                          idir,
-		                                          near_x, near_y, near_z,
-		                                          far_x, far_y, far_z,
-		                                          node_addr,
-		                                          difl,
-		                                          dist);
-	}
+                                              idir,
+                                              near_x,
+                                              near_y,
+                                              near_z,
+                                              far_x,
+                                              far_y,
+                                              far_z,
+                                              node_addr,
+                                              difl,
+                                              dist);
+  }
 }
diff --git a/intern/cycles/kernel/bvh/obvh_shadow_all.h b/intern/cycles/kernel/bvh/obvh_shadow_all.h
index 10d5422c31c..98efb003788 100644
--- a/intern/cycles/kernel/bvh/obvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/obvh_shadow_all.h
@@ -36,645 +36,635 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
                                              const uint max_hits,
                                              uint *num_hits)
 {
-	/* TODO(sergey):
-	 *  - Test if pushing distance on the stack helps.
-	 * - Likely and unlikely for if() statements.
-	 * - Test restrict attribute for pointers.
-	 */
-
-	/* Traversal stack in CUDA thread-local memory. */
-	OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
-	traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-	/* Traversal variables in registers. */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
-
-	/* Ray parameters in registers. */
-	const float tmax = ray->t;
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
-	float isect_t = tmax;
+  /* TODO(sergey):
+   *  - Test if pushing distance on the stack helps.
+   * - Likely and unlikely for if() statements.
+   * - Test restrict attribute for pointers.
+   */
+
+  /* Traversal stack in CUDA thread-local memory. */
+  OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
+  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+
+  /* Traversal variables in registers. */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* Ray parameters in registers. */
+  const float tmax = ray->t;
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  float isect_t = tmax;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
-	*num_hits = 0;
-	isect_array->t = tmax;
+  *num_hits = 0;
+  isect_array->t = tmax;
 
 #if BVH_FEATURE(BVH_INSTANCING)
-	int num_hits_in_instance = 0;
+  int num_hits_in_instance = 0;
 #endif
 
-	avxf tnear(0.0f), tfar(isect_t);
+  avxf tnear(0.0f), tfar(isect_t);
 #if BVH_FEATURE(BVH_HAIR)
-	avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+  avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #endif
-	avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+  avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 
 #ifdef __KERNEL_AVX2__
-	float3 P_idir = P*idir;
-	avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+  float3 P_idir = P * idir;
+  avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-	avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
+  avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
 #endif
 
-	/* Offsets to select the side that becomes the lower or upper bound. */
-	int near_x, near_y, near_z;
-	int far_x, far_y, far_z;
-	obvh_near_far_idx_calc(idir,
-	                       &near_x, &near_y, &near_z,
-	                       &far_x, &far_y, &far_z);
-
-	/* Traversal loop. */
-	do {
-		do {
-			/* Traverse internal nodes. */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-				(void) inodes;
-
-				if(false
+  /* Offsets to select the side that becomes the lower or upper bound. */
+  int near_x, near_y, near_z;
+  int far_x, far_y, far_z;
+  obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+
+  /* Traversal loop. */
+  do {
+    do {
+      /* Traverse internal nodes. */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+        (void)inodes;
+
+        if (false
 #ifdef __VISIBILITY_FLAG__
-				   || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
+            || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
 #endif
 #if BVH_FEATURE(BVH_MOTION)
-				   || UNLIKELY(ray->time < inodes.y)
-				   || UNLIKELY(ray->time > inodes.z)
+            || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
 #endif
-				) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
-
-				avxf dist;
-				int child_mask = NODE_INTERSECT(kg,
-				                                tnear,
-				                                tfar,
+        ) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
+
+        avxf dist;
+        int child_mask = NODE_INTERSECT(kg,
+                                        tnear,
+                                        tfar,
 #ifdef __KERNEL_AVX2__
-					                            P_idir4,
+                                        P_idir4,
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-//#if !defined(__KERNEL_AVX2__)
-					                            org4,
+                                        //#if !defined(__KERNEL_AVX2__)
+                                        org4,
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-					                            dir4,
+                                        dir4,
 #endif
-					                            idir4,
-					                            near_x, near_y, near_z,
-					                            far_x, far_y, far_z,
-					                            node_addr,
-					                            &dist);
-
-				if(child_mask != 0) {
-					avxf cnodes;
+                                        idir4,
+                                        near_x,
+                                        near_y,
+                                        near_z,
+                                        far_x,
+                                        far_y,
+                                        far_z,
+                                        node_addr,
+                                        &dist);
+
+        if (child_mask != 0) {
+          avxf cnodes;
 #if BVH_FEATURE(BVH_HAIR)
-					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-						cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
-					}
-					else
+          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
+          }
+          else
 #endif
-					{
-						cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
-					}
-
-					/* One child is hit, continue with that child. */
-					int r = __bscf(child_mask);
-					if(child_mask == 0) {
-						node_addr = __float_as_int(cnodes[r]);
-						continue;
-					}
-
-					/* Two children are hit, push far child, and continue with
-					 * closer child.
-					 */
-					int c0 = __float_as_int(cnodes[r]);
-					float d0 = ((float*)&dist)[r];
-					r = __bscf(child_mask);
-					int c1 = __float_as_int(cnodes[r]);
-					float d1 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						if(d1 < d0) {
-							node_addr = c1;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c0;
-							traversal_stack[stack_ptr].dist = d0;
-							continue;
-						}
-						else {
-							node_addr = c0;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c1;
-							traversal_stack[stack_ptr].dist = d1;
-							continue;
-						}
-					}
-
-					/* Here starts the slow path for 3 or 4 hit children. We push
-					 * all nodes onto the stack to sort them there.
-					 */
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c1;
-					traversal_stack[stack_ptr].dist = d1;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c0;
-					traversal_stack[stack_ptr].dist = d0;
-
-					/* Three children are hit, push all onto stack and sort 3
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c2 = __float_as_int(cnodes[r]);
-					float d2 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Four children are hit, push all onto stack and sort 4
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c3 = __float_as_int(cnodes[r]);
-					float d3 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c3;
-						traversal_stack[stack_ptr].dist = d3;
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c3;
-					traversal_stack[stack_ptr].dist = d3;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c2;
-					traversal_stack[stack_ptr].dist = d2;
-
-					/* Five children are hit, push all onto stack and sort 5
-					 * stack items, continue with closest child
-					 */
-					r = __bscf(child_mask);
-					int c4 = __float_as_int(cnodes[r]);
-					float d4 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c4;
-						traversal_stack[stack_ptr].dist = d4;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Six children are hit, push all onto stack and sort 6
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c5 = __float_as_int(cnodes[r]);
-					float d5 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c5;
-						traversal_stack[stack_ptr].dist = d5;
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c4;
-						traversal_stack[stack_ptr].dist = d4;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4],
-						                &traversal_stack[stack_ptr - 5]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c5;
-					traversal_stack[stack_ptr].dist = d5;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c4;
-					traversal_stack[stack_ptr].dist = d4;
-
-					/* Seven children are hit, push all onto stack and sort 7
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c6 = __float_as_int(cnodes[r]);
-					float d6 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c6;
-						traversal_stack[stack_ptr].dist = d6;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4],
-						                &traversal_stack[stack_ptr - 5],
-						                &traversal_stack[stack_ptr - 6]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Eight children are hit, push all onto stack and sort 8
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c7 = __float_as_int(cnodes[r]);
-					float d7 = ((float*)&dist)[r];
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c7;
-					traversal_stack[stack_ptr].dist = d7;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c6;
-					traversal_stack[stack_ptr].dist = d6;
-					obvh_stack_sort(&traversal_stack[stack_ptr],
-					                &traversal_stack[stack_ptr - 1],
-					                &traversal_stack[stack_ptr - 2],
-					                &traversal_stack[stack_ptr - 3],
-					                &traversal_stack[stack_ptr - 4],
-					                &traversal_stack[stack_ptr - 5],
-					                &traversal_stack[stack_ptr - 6],
-					                &traversal_stack[stack_ptr - 7]);
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
-
-				node_addr = traversal_stack[stack_ptr].addr;
-				--stack_ptr;
-			}
-
-			/* If node is leaf, fetch triangle list. */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+          {
+            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
+          }
+
+          /* One child is hit, continue with that child. */
+          int r = __bscf(child_mask);
+          if (child_mask == 0) {
+            node_addr = __float_as_int(cnodes[r]);
+            continue;
+          }
+
+          /* Two children are hit, push far child, and continue with
+           * closer child.
+           */
+          int c0 = __float_as_int(cnodes[r]);
+          float d0 = ((float *)&dist)[r];
+          r = __bscf(child_mask);
+          int c1 = __float_as_int(cnodes[r]);
+          float d1 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            if (d1 < d0) {
+              node_addr = c1;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c0;
+              traversal_stack[stack_ptr].dist = d0;
+              continue;
+            }
+            else {
+              node_addr = c0;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c1;
+              traversal_stack[stack_ptr].dist = d1;
+              continue;
+            }
+          }
+
+          /* Here starts the slow path for 3 or 4 hit children. We push
+           * all nodes onto the stack to sort them there.
+           */
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c1;
+          traversal_stack[stack_ptr].dist = d1;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c0;
+          traversal_stack[stack_ptr].dist = d0;
+
+          /* Three children are hit, push all onto stack and sort 3
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c2 = __float_as_int(cnodes[r]);
+          float d2 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Four children are hit, push all onto stack and sort 4
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c3 = __float_as_int(cnodes[r]);
+          float d3 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c3;
+            traversal_stack[stack_ptr].dist = d3;
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c3;
+          traversal_stack[stack_ptr].dist = d3;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c2;
+          traversal_stack[stack_ptr].dist = d2;
+
+          /* Five children are hit, push all onto stack and sort 5
+           * stack items, continue with closest child
+           */
+          r = __bscf(child_mask);
+          int c4 = __float_as_int(cnodes[r]);
+          float d4 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c4;
+            traversal_stack[stack_ptr].dist = d4;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Six children are hit, push all onto stack and sort 6
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c5 = __float_as_int(cnodes[r]);
+          float d5 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c5;
+            traversal_stack[stack_ptr].dist = d5;
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c4;
+            traversal_stack[stack_ptr].dist = d4;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4],
+                            &traversal_stack[stack_ptr - 5]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c5;
+          traversal_stack[stack_ptr].dist = d5;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c4;
+          traversal_stack[stack_ptr].dist = d4;
+
+          /* Seven children are hit, push all onto stack and sort 7
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c6 = __float_as_int(cnodes[r]);
+          float d6 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c6;
+            traversal_stack[stack_ptr].dist = d6;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4],
+                            &traversal_stack[stack_ptr - 5],
+                            &traversal_stack[stack_ptr - 6]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Eight children are hit, push all onto stack and sort 8
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c7 = __float_as_int(cnodes[r]);
+          float d7 = ((float *)&dist)[r];
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c7;
+          traversal_stack[stack_ptr].dist = d7;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c6;
+          traversal_stack[stack_ptr].dist = d6;
+          obvh_stack_sort(&traversal_stack[stack_ptr],
+                          &traversal_stack[stack_ptr - 1],
+                          &traversal_stack[stack_ptr - 2],
+                          &traversal_stack[stack_ptr - 3],
+                          &traversal_stack[stack_ptr - 4],
+                          &traversal_stack[stack_ptr - 5],
+                          &traversal_stack[stack_ptr - 6],
+                          &traversal_stack[stack_ptr - 7]);
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
+
+        node_addr = traversal_stack[stack_ptr].addr;
+        --stack_ptr;
+      }
+
+      /* If node is leaf, fetch triangle list. */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
 #ifdef __VISIBILITY_FLAG__
-				if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
+        if ((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
 #endif
 
-				int prim_addr = __float_as_int(leaf.x);
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-					const uint p_type = type & PRIMITIVE_ALL;
-
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-
-					/* Primitive intersection. */
-					if(p_type == PRIMITIVE_TRIANGLE) {
-						int prim_count = prim_addr2 - prim_addr;
-						if(prim_count < 3) {
-							while(prim_addr < prim_addr2) {
-								kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
-								int hit = triangle_intersect(kg,
-								                             isect_array,
-								                             P,
-								                             dir,
-								                             PATH_RAY_SHADOW,
-								                             object,
-								                             prim_addr);
-								/* Shadow ray early termination. */
-								if(hit) {
-									/* detect if this surface has a shader with transparent shadows */
-
-									/* todo: optimize so primitive visibility flag indicates if
-									 * the primitive has a transparent shadow shader? */
-									int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
-									int shader = 0;
+          int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+          const uint p_type = type & PRIMITIVE_ALL;
+
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+
+          /* Primitive intersection. */
+          if (p_type == PRIMITIVE_TRIANGLE) {
+            int prim_count = prim_addr2 - prim_addr;
+            if (prim_count < 3) {
+              while (prim_addr < prim_addr2) {
+                kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
+                              p_type);
+                int hit = triangle_intersect(
+                    kg, isect_array, P, dir, PATH_RAY_SHADOW, object, prim_addr);
+                /* Shadow ray early termination. */
+                if (hit) {
+                  /* detect if this surface has a shader with transparent shadows */
+
+                  /* todo: optimize so primitive visibility flag indicates if
+                   * the primitive has a transparent shadow shader? */
+                  int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
+                  int shader = 0;
 
 #ifdef __HAIR__
-									if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
+                  if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
 #endif
-									{
-										shader = kernel_tex_fetch(__tri_shader, prim);
-									}
+                  {
+                    shader = kernel_tex_fetch(__tri_shader, prim);
+                  }
 #ifdef __HAIR__
-									else {
-										float4 str = kernel_tex_fetch(__curves, prim);
-										shader = __float_as_int(str.z);
-									}
+                  else {
+                    float4 str = kernel_tex_fetch(__curves, prim);
+                    shader = __float_as_int(str.z);
+                  }
 #endif
-									int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
-									/* if no transparent shadows, all light is blocked */
-									if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
-										return true;
-									}
-									/* if maximum number of hits reached, block all light */
-									else if(*num_hits == max_hits) {
-										return true;
-									}
-
-									/* move on to next entry in intersections array */
-									isect_array++;
-									(*num_hits)++;
+                  int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+
+                  /* if no transparent shadows, all light is blocked */
+                  if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+                    return true;
+                  }
+                  /* if maximum number of hits reached, block all light */
+                  else if (*num_hits == max_hits) {
+                    return true;
+                  }
+
+                  /* move on to next entry in intersections array */
+                  isect_array++;
+                  (*num_hits)++;
 #if BVH_FEATURE(BVH_INSTANCING)
-									num_hits_in_instance++;
+                  num_hits_in_instance++;
 #endif
 
-									isect_array->t = isect_t;
-								}
+                  isect_array->t = isect_t;
+                }
 
-								prim_addr++;
-							} //while
-					} else {
-							kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) == p_type);
+                prim_addr++;
+              }  //while
+            }
+            else {
+              kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) ==
+                            p_type);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-							int* nhiptr = &num_hits_in_instance;
+              int *nhiptr = &num_hits_in_instance;
 #else
-							int nhi= 0;
-							int *nhiptr = &nhi;
+            int nhi = 0;
+            int *nhiptr = &nhi;
 #endif
 
-							int result = triangle_intersect8(kg,
-							                                 &isect_array,
-							                                 P,
-							                                 dir,
-							                                 PATH_RAY_SHADOW,
-							                                 object,
-							                                 prim_addr,
-							                                 prim_count,
-							                                 num_hits,
-							                                 max_hits,
-							                                 nhiptr,
-							                                 isect_t);
-							if(result == 2) {
-								return true;
-							}
-						}   // prim_count
-					}  // PRIMITIVE_TRIANGLE
-					else {
-							while(prim_addr < prim_addr2) {
-							kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
+              int result = triangle_intersect8(kg,
+                                               &isect_array,
+                                               P,
+                                               dir,
+                                               PATH_RAY_SHADOW,
+                                               object,
+                                               prim_addr,
+                                               prim_count,
+                                               num_hits,
+                                               max_hits,
+                                               nhiptr,
+                                               isect_t);
+              if (result == 2) {
+                return true;
+              }
+            }  // prim_count
+          }    // PRIMITIVE_TRIANGLE
+          else {
+            while (prim_addr < prim_addr2) {
+              kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
 
 #ifdef __SHADOW_TRICKS__
-							uint tri_object = (object == OBJECT_NONE)
-									? kernel_tex_fetch(__prim_object, prim_addr)
-									: object;
-							if(tri_object == skip_object) {
-								++prim_addr;
-								continue;
-							}
+              uint tri_object = (object == OBJECT_NONE) ?
+                                    kernel_tex_fetch(__prim_object, prim_addr) :
+                                    object;
+              if (tri_object == skip_object) {
+                ++prim_addr;
+                continue;
+              }
 #endif
 
-							bool hit;
+              bool hit;
 
-							/* todo: specialized intersect functions which don't fill in
-							 * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
-							 * might give a few % performance improvement */
+              /* todo: specialized intersect functions which don't fill in
+               * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
+               * might give a few % performance improvement */
 
-							switch(p_type) {
+              switch (p_type) {
 
 #if BVH_FEATURE(BVH_MOTION)
-								case PRIMITIVE_MOTION_TRIANGLE: {
-									hit = motion_triangle_intersect(kg,
-									                                isect_array,
-									                                P,
-									                                dir,
-									                                ray->time,
-									                                PATH_RAY_SHADOW,
-									                                object,
-									                                prim_addr);
-									break;
-								}
+                case PRIMITIVE_MOTION_TRIANGLE: {
+                  hit = motion_triangle_intersect(
+                      kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, prim_addr);
+                  break;
+                }
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-								case PRIMITIVE_CURVE:
-								case PRIMITIVE_MOTION_CURVE: {
-									const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-									if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
-										hit = cardinal_curve_intersect(kg,
-										                               isect_array,
-										                               P,
-										                               dir,
-										                               PATH_RAY_SHADOW,
-										                               object,
-										                               prim_addr,
-										                               ray->time,
-										                               curve_type,
-										                               NULL,
-										                               0, 0);
-									}
-									else {
-										hit = curve_intersect(kg,
-										                      isect_array,
-										                      P,
-										                      dir,
-										                      PATH_RAY_SHADOW,
-										                      object,
-										                      prim_addr,
-										                      ray->time,
-										                      curve_type,
-										                      NULL,
-										                      0, 0);
-									}
-									break;
-								}
+                case PRIMITIVE_CURVE:
+                case PRIMITIVE_MOTION_CURVE: {
+                  const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+                  if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+                    hit = cardinal_curve_intersect(kg,
+                                                   isect_array,
+                                                   P,
+                                                   dir,
+                                                   PATH_RAY_SHADOW,
+                                                   object,
+                                                   prim_addr,
+                                                   ray->time,
+                                                   curve_type,
+                                                   NULL,
+                                                   0,
+                                                   0);
+                  }
+                  else {
+                    hit = curve_intersect(kg,
+                                          isect_array,
+                                          P,
+                                          dir,
+                                          PATH_RAY_SHADOW,
+                                          object,
+                                          prim_addr,
+                                          ray->time,
+                                          curve_type,
+                                          NULL,
+                                          0,
+                                          0);
+                  }
+                  break;
+                }
 #endif
-								default: {
-									hit = false;
-									break;
-								}
-							}
+                default: {
+                  hit = false;
+                  break;
+                }
+              }
 
-							/* Shadow ray early termination. */
-							if(hit) {
-								/* detect if this surface has a shader with transparent shadows */
+              /* Shadow ray early termination. */
+              if (hit) {
+                /* detect if this surface has a shader with transparent shadows */
 
-								/* todo: optimize so primitive visibility flag indicates if
-								 * the primitive has a transparent shadow shader? */
-								int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
-								int shader = 0;
+                /* todo: optimize so primitive visibility flag indicates if
+                 * the primitive has a transparent shadow shader? */
+                int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
+                int shader = 0;
 
 #ifdef __HAIR__
-								if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
+                if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
 #endif
-								{
-									shader = kernel_tex_fetch(__tri_shader, prim);
-								}
+                {
+                  shader = kernel_tex_fetch(__tri_shader, prim);
+                }
 #ifdef __HAIR__
-								else {
-									float4 str = kernel_tex_fetch(__curves, prim);
-									shader = __float_as_int(str.z);
-								}
+                else {
+                  float4 str = kernel_tex_fetch(__curves, prim);
+                  shader = __float_as_int(str.z);
+                }
 #endif
-								int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
-								/* if no transparent shadows, all light is blocked */
-								if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
-									return true;
-								}
-								/* if maximum number of hits reached, block all light */
-								else if(*num_hits == max_hits) {
-									return true;
-								}
-
-								/* move on to next entry in intersections array */
-								isect_array++;
-								(*num_hits)++;
+                int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+
+                /* if no transparent shadows, all light is blocked */
+                if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+                  return true;
+                }
+                /* if maximum number of hits reached, block all light */
+                else if (*num_hits == max_hits) {
+                  return true;
+                }
+
+                /* move on to next entry in intersections array */
+                isect_array++;
+                (*num_hits)++;
 #if BVH_FEATURE(BVH_INSTANCING)
-								num_hits_in_instance++;
+                num_hits_in_instance++;
 #endif
 
-								isect_array->t = isect_t;
-							}
+                isect_array->t = isect_t;
+              }
 
-							prim_addr++;
-						}//while prim
-					}
-				}
+              prim_addr++;
+            }  //while prim
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* Instance push. */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+        else {
+          /* Instance push. */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
 
 #  if BVH_FEATURE(BVH_MOTION)
-					isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+          isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #  else
-					isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+          isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
-					num_hits_in_instance = 0;
-					isect_array->t = isect_t;
+          num_hits_in_instance = 0;
+          isect_array->t = isect_t;
 
-					obvh_near_far_idx_calc(idir,
-					                       &near_x, &near_y, &near_z,
-					                       &far_x, &far_y, &far_z);
-					tfar = avxf(isect_t);
+          obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+          tfar = avxf(isect_t);
 #  if BVH_FEATURE(BVH_HAIR)
-					dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+          dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #  endif
-					idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+          idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 #  ifdef __KERNEL_AVX2__
-					P_idir = P*idir;
-					P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+          P_idir = P * idir;
+          P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-					org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+          org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
 #  endif
 
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-					node_addr = kernel_tex_fetch(__object_node, object);
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
 
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+          node_addr = kernel_tex_fetch(__object_node, object);
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* Instance pop. */
-			if(num_hits_in_instance) {
-				float t_fac;
+      /* Instance pop. */
+      if (num_hits_in_instance) {
+        float t_fac;
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
 #  else
-				bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
 #  endif
-				/* Scale isect->t to adjust for instancing. */
-				for(int i = 0; i < num_hits_in_instance; i++) {
-					(isect_array-i-1)->t *= t_fac;
-				}
-			}
-			else {
+        /* Scale isect->t to adjust for instancing. */
+        for (int i = 0; i < num_hits_in_instance; i++) {
+          (isect_array - i - 1)->t *= t_fac;
+        }
+      }
+      else {
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
 #  else
-				bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
 #  endif
-			}
+      }
 
-			isect_t = tmax;
-			isect_array->t = isect_t;
+      isect_t = tmax;
+      isect_array->t = isect_t;
 
-			obvh_near_far_idx_calc(idir,
-			                       &near_x, &near_y, &near_z,
-			                       &far_x, &far_y, &far_z);
-			tfar = avxf(isect_t);
+      obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+      tfar = avxf(isect_t);
 #  if BVH_FEATURE(BVH_HAIR)
-			dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+      dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #  endif
-			idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+      idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 #  ifdef __KERNEL_AVX2__
-			P_idir = P*idir;
-			P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+      P_idir = P * idir;
+      P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-			org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+      org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr].addr;
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_INSTANCING) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr].addr;
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_INSTANCING) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return false;
+  return false;
 }
 
 #undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_traversal.h b/intern/cycles/kernel/bvh/obvh_traversal.h
index 5df7a3be515..86b1de48aaa 100644
--- a/intern/cycles/kernel/bvh/obvh_traversal.h
+++ b/intern/cycles/kernel/bvh/obvh_traversal.h
@@ -37,598 +37,583 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
                                              Intersection *isect,
                                              const uint visibility
 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-                                             ,uint *lcg_state,
+                                             ,
+                                             uint *lcg_state,
                                              float difl,
                                              float extmax
 #endif
-                                             )
+)
 {
-	/* Traversal stack in CUDA thread-local memory. */
-	OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
-	traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-	traversal_stack[0].dist = -FLT_MAX;
-
-	/* Traversal variables in registers. */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
-	float node_dist = -FLT_MAX;
-
-	/* Ray parameters in registers. */
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
+  /* Traversal stack in CUDA thread-local memory. */
+  OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
+  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+  traversal_stack[0].dist = -FLT_MAX;
+
+  /* Traversal variables in registers. */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+  float node_dist = -FLT_MAX;
+
+  /* Ray parameters in registers. */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
-	isect->t = ray->t;
-	isect->u = 0.0f;
-	isect->v = 0.0f;
-	isect->prim = PRIM_NONE;
-	isect->object = OBJECT_NONE;
+  isect->t = ray->t;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
 
-	BVH_DEBUG_INIT();
-	avxf tnear(0.0f), tfar(ray->t);
+  BVH_DEBUG_INIT();
+  avxf tnear(0.0f), tfar(ray->t);
 #if BVH_FEATURE(BVH_HAIR)
-	avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+  avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #endif
-	avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+  avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 
 #ifdef __KERNEL_AVX2__
-	float3 P_idir = P*idir;
-	avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+  float3 P_idir = P * idir;
+  avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-	avx3f org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+  avx3f org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
 #endif
 
-	/* Offsets to select the side that becomes the lower or upper bound. */
-	int near_x, near_y, near_z;
-	int far_x, far_y, far_z;
-	obvh_near_far_idx_calc(idir,
-	                       &near_x, &near_y, &near_z,
-	                       &far_x, &far_y, &far_z);
-	/* Traversal loop. */
-	do {
-		do {
-			/* Traverse internal nodes. */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-				(void) inodes;
-
-				if(UNLIKELY(node_dist > isect->t)
+  /* Offsets to select the side that becomes the lower or upper bound. */
+  int near_x, near_y, near_z;
+  int far_x, far_y, far_z;
+  obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+  /* Traversal loop. */
+  do {
+    do {
+      /* Traverse internal nodes. */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+        (void)inodes;
+
+        if (UNLIKELY(node_dist > isect->t)
 #if BVH_FEATURE(BVH_MOTION)
-				   || UNLIKELY(ray->time < inodes.y)
-				   || UNLIKELY(ray->time > inodes.z)
+            || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
 #endif
 #ifdef __VISIBILITY_FLAG__
-				   || (__float_as_uint(inodes.x) & visibility) == 0
+            || (__float_as_uint(inodes.x) & visibility) == 0
 #endif
-				 )
-				{
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					node_dist = traversal_stack[stack_ptr].dist;
-					--stack_ptr;
-					continue;
-				}
+        ) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          node_dist = traversal_stack[stack_ptr].dist;
+          --stack_ptr;
+          continue;
+        }
 
-				int child_mask;
-				avxf dist;
+        int child_mask;
+        avxf dist;
 
-				BVH_DEBUG_NEXT_NODE();
+        BVH_DEBUG_NEXT_NODE();
 
 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-				if(difl != 0.0f) {
-					/* NOTE: We extend all the child BB instead of fetching
-					 * and checking visibility flags for each of the,
-					 *
-					 * Need to test if doing opposite would be any faster.
-					 */
-					child_mask = NODE_INTERSECT_ROBUST(kg,
-					                                   tnear,
-					                                   tfar,
+        if (difl != 0.0f) {
+          /* NOTE: We extend all the child BB instead of fetching
+           * and checking visibility flags for each of the,
+           *
+           * Need to test if doing opposite would be any faster.
+           */
+          child_mask = NODE_INTERSECT_ROBUST(kg,
+                                             tnear,
+                                             tfar,
 #  ifdef __KERNEL_AVX2__
-					                                   P_idir4,
+                                             P_idir4,
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-					                                   org4,
+                                             org4,
 #  endif
 #  if BVH_FEATURE(BVH_HAIR)
-					                                   dir4,
+                                             dir4,
 #  endif
-					                                   idir4,
-					                                   near_x, near_y, near_z,
-					                                   far_x, far_y, far_z,
-					                                   node_addr,
-					                                   difl,
-					                                   &dist);
-				}
-				else
-#endif  /* BVH_HAIR_MINIMUM_WIDTH */
-				{
-					child_mask = NODE_INTERSECT(kg,
-					                            tnear,
-					                            tfar,
+                                             idir4,
+                                             near_x,
+                                             near_y,
+                                             near_z,
+                                             far_x,
+                                             far_y,
+                                             far_z,
+                                             node_addr,
+                                             difl,
+                                             &dist);
+        }
+        else
+#endif /* BVH_HAIR_MINIMUM_WIDTH */
+        {
+          child_mask = NODE_INTERSECT(kg,
+                                      tnear,
+                                      tfar,
 #ifdef __KERNEL_AVX2__
-					                            P_idir4,
+                                      P_idir4,
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-					                            org4,
+                                      org4,
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-					                            dir4,
+                                      dir4,
 #endif
-					                            idir4,
-					                            near_x, near_y, near_z,
-					                            far_x, far_y, far_z,
-					                            node_addr,
-					                            &dist);
-				}
-
-				if(child_mask != 0) {
-					avxf cnodes;
-					/* TODO(sergey): Investigate whether moving cnodes upwards
-					 * gives a speedup (will be different cache pattern but will
-					 * avoid extra check here).
-					 */
+                                      idir4,
+                                      near_x,
+                                      near_y,
+                                      near_z,
+                                      far_x,
+                                      far_y,
+                                      far_z,
+                                      node_addr,
+                                      &dist);
+        }
+
+        if (child_mask != 0) {
+          avxf cnodes;
+          /* TODO(sergey): Investigate whether moving cnodes upwards
+           * gives a speedup (will be different cache pattern but will
+           * avoid extra check here).
+           */
 #if BVH_FEATURE(BVH_HAIR)
-					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-						cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
-					}
-					else
+          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
+          }
+          else
 #endif
-					{
-						cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
-					}
-
-					/* One child is hit, continue with that child. */
-					int r = __bscf(child_mask);
-					float d0 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						node_addr = __float_as_int(cnodes[r]);
-						node_dist = d0;
-						continue;
-					}
-
-					/* Two children are hit, push far child, and continue with
-					 * closer child.
-					 */
-					int c0 = __float_as_int(cnodes[r]);
-					r = __bscf(child_mask);
-					int c1 = __float_as_int(cnodes[r]);
-					float d1 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						if(d1 < d0) {
-							node_addr = c1;
-							node_dist = d1;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c0;
-							traversal_stack[stack_ptr].dist = d0;
-							continue;
-						}
-						else {
-							node_addr = c0;
-							node_dist = d0;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c1;
-							traversal_stack[stack_ptr].dist = d1;
-							continue;
-						}
-					}
-
-					/* Here starts the slow path for 3 or 4 hit children. We push
-					 * all nodes onto the stack to sort them there.
-					 */
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c1;
-					traversal_stack[stack_ptr].dist = d1;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c0;
-					traversal_stack[stack_ptr].dist = d0;
-
-					/* Three children are hit, push all onto stack and sort 3
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c2 = __float_as_int(cnodes[r]);
-					float d2 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						node_dist = traversal_stack[stack_ptr].dist;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Four children are hit, push all onto stack and sort 4
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c3 = __float_as_int(cnodes[r]);
-					float d3 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c3;
-						traversal_stack[stack_ptr].dist = d3;
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						node_dist = traversal_stack[stack_ptr].dist;
-						--stack_ptr;
-						continue;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c3;
-					traversal_stack[stack_ptr].dist = d3;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c2;
-					traversal_stack[stack_ptr].dist = d2;
-
-					/* Five children are hit, push all onto stack and sort 5
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c4 = __float_as_int(cnodes[r]);
-					float d4 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c4;
-						traversal_stack[stack_ptr].dist = d4;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						node_dist = traversal_stack[stack_ptr].dist;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Six children are hit, push all onto stack and sort 6
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c5 = __float_as_int(cnodes[r]);
-					float d5 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c5;
-						traversal_stack[stack_ptr].dist = d5;
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c4;
-						traversal_stack[stack_ptr].dist = d4;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4],
-						                &traversal_stack[stack_ptr - 5]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						node_dist = traversal_stack[stack_ptr].dist;
-						--stack_ptr;
-						continue;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c5;
-					traversal_stack[stack_ptr].dist = d5;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c4;
-					traversal_stack[stack_ptr].dist = d4;
-
-					/* Seven children are hit, push all onto stack and sort 7
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c6 = __float_as_int(cnodes[r]);
-					float d6 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c6;
-						traversal_stack[stack_ptr].dist = d6;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4],
-						                &traversal_stack[stack_ptr - 5],
-						                &traversal_stack[stack_ptr - 6]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						node_dist = traversal_stack[stack_ptr].dist;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Eight children are hit, push all onto stack and sort 8
-					* stack items, continue with closest child.
-					*/
-					r = __bscf(child_mask);
-					int c7 = __float_as_int(cnodes[r]);
-					float d7 = ((float*)&dist)[r];
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c7;
-					traversal_stack[stack_ptr].dist = d7;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c6;
-					traversal_stack[stack_ptr].dist = d6;
-					obvh_stack_sort(&traversal_stack[stack_ptr],
-					                &traversal_stack[stack_ptr - 1],
-					                &traversal_stack[stack_ptr - 2],
-					                &traversal_stack[stack_ptr - 3],
-					                &traversal_stack[stack_ptr - 4],
-					                &traversal_stack[stack_ptr - 5],
-					                &traversal_stack[stack_ptr - 6],
-					                &traversal_stack[stack_ptr - 7]);
-					node_addr = traversal_stack[stack_ptr].addr;
-					node_dist = traversal_stack[stack_ptr].dist;
-					--stack_ptr;
-					continue;
-				}
-
-
-				node_addr = traversal_stack[stack_ptr].addr;
-				node_dist = traversal_stack[stack_ptr].dist;
-				--stack_ptr;
-			}
-
-			/* If node is leaf, fetch triangle list. */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+          {
+            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
+          }
+
+          /* One child is hit, continue with that child. */
+          int r = __bscf(child_mask);
+          float d0 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            node_addr = __float_as_int(cnodes[r]);
+            node_dist = d0;
+            continue;
+          }
+
+          /* Two children are hit, push far child, and continue with
+           * closer child.
+           */
+          int c0 = __float_as_int(cnodes[r]);
+          r = __bscf(child_mask);
+          int c1 = __float_as_int(cnodes[r]);
+          float d1 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            if (d1 < d0) {
+              node_addr = c1;
+              node_dist = d1;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c0;
+              traversal_stack[stack_ptr].dist = d0;
+              continue;
+            }
+            else {
+              node_addr = c0;
+              node_dist = d0;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c1;
+              traversal_stack[stack_ptr].dist = d1;
+              continue;
+            }
+          }
+
+          /* Here starts the slow path for 3 or 4 hit children. We push
+           * all nodes onto the stack to sort them there.
+           */
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c1;
+          traversal_stack[stack_ptr].dist = d1;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c0;
+          traversal_stack[stack_ptr].dist = d0;
+
+          /* Three children are hit, push all onto stack and sort 3
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c2 = __float_as_int(cnodes[r]);
+          float d2 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            node_dist = traversal_stack[stack_ptr].dist;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Four children are hit, push all onto stack and sort 4
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c3 = __float_as_int(cnodes[r]);
+          float d3 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c3;
+            traversal_stack[stack_ptr].dist = d3;
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            node_dist = traversal_stack[stack_ptr].dist;
+            --stack_ptr;
+            continue;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c3;
+          traversal_stack[stack_ptr].dist = d3;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c2;
+          traversal_stack[stack_ptr].dist = d2;
+
+          /* Five children are hit, push all onto stack and sort 5
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c4 = __float_as_int(cnodes[r]);
+          float d4 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c4;
+            traversal_stack[stack_ptr].dist = d4;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            node_dist = traversal_stack[stack_ptr].dist;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Six children are hit, push all onto stack and sort 6
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c5 = __float_as_int(cnodes[r]);
+          float d5 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c5;
+            traversal_stack[stack_ptr].dist = d5;
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c4;
+            traversal_stack[stack_ptr].dist = d4;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4],
+                            &traversal_stack[stack_ptr - 5]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            node_dist = traversal_stack[stack_ptr].dist;
+            --stack_ptr;
+            continue;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c5;
+          traversal_stack[stack_ptr].dist = d5;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c4;
+          traversal_stack[stack_ptr].dist = d4;
+
+          /* Seven children are hit, push all onto stack and sort 7
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c6 = __float_as_int(cnodes[r]);
+          float d6 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c6;
+            traversal_stack[stack_ptr].dist = d6;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4],
+                            &traversal_stack[stack_ptr - 5],
+                            &traversal_stack[stack_ptr - 6]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            node_dist = traversal_stack[stack_ptr].dist;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Eight children are hit, push all onto stack and sort 8
+          * stack items, continue with closest child.
+          */
+          r = __bscf(child_mask);
+          int c7 = __float_as_int(cnodes[r]);
+          float d7 = ((float *)&dist)[r];
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c7;
+          traversal_stack[stack_ptr].dist = d7;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c6;
+          traversal_stack[stack_ptr].dist = d6;
+          obvh_stack_sort(&traversal_stack[stack_ptr],
+                          &traversal_stack[stack_ptr - 1],
+                          &traversal_stack[stack_ptr - 2],
+                          &traversal_stack[stack_ptr - 3],
+                          &traversal_stack[stack_ptr - 4],
+                          &traversal_stack[stack_ptr - 5],
+                          &traversal_stack[stack_ptr - 6],
+                          &traversal_stack[stack_ptr - 7]);
+          node_addr = traversal_stack[stack_ptr].addr;
+          node_dist = traversal_stack[stack_ptr].dist;
+          --stack_ptr;
+          continue;
+        }
+
+        node_addr = traversal_stack[stack_ptr].addr;
+        node_dist = traversal_stack[stack_ptr].dist;
+        --stack_ptr;
+      }
+
+      /* If node is leaf, fetch triangle list. */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
 
 #ifdef __VISIBILITY_FLAG__
-				if(UNLIKELY((node_dist > isect->t) ||
-				            ((__float_as_uint(leaf.z) & visibility) == 0)))
+        if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
 #else
-				if(UNLIKELY((node_dist > isect->t)))
+        if (UNLIKELY((node_dist > isect->t)))
 #endif
-				{
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					node_dist = traversal_stack[stack_ptr].dist;
-					--stack_ptr;
-					continue;
-				}
-				int prim_addr = __float_as_int(leaf.x);
+        {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          node_dist = traversal_stack[stack_ptr].dist;
+          --stack_ptr;
+          continue;
+        }
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					node_dist = traversal_stack[stack_ptr].dist;
-					--stack_ptr;
-
-					/* Primitive intersection. */
-					switch(type & PRIMITIVE_ALL) {
-					case PRIMITIVE_TRIANGLE: {
-						int prim_count = prim_addr2 - prim_addr;
-						if(prim_count < 3) {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								 BVH_DEBUG_NEXT_INTERSECTION();
-								 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								 if(triangle_intersect(kg,
-								                       isect,
-								                       P,
-								                       dir,
-								                       visibility,
-								                       object,
-								                       prim_addr))
-								 {
-									 tfar = avxf(isect->t);
-									 /* Shadow ray early termination. */
-									 if(visibility == PATH_RAY_SHADOW_OPAQUE) {
-										 return true;
-									 }
-								 }
-							}//for
-						}
-						else {
-							kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-							if(triangle_intersect8(kg,
-							                       &isect,
-							                       P,
-							                       dir,
-							                       visibility,
-							                       object,
-							                       prim_addr,
-							                       prim_count,
-							                       0,
-							                       0,
-							                       NULL,
-							                       0.0f))
-							{
-								tfar = avxf(isect->t);
-								if(visibility == PATH_RAY_SHADOW_OPAQUE) {
-									return true;
-								}
-							}
-						}//prim count
-						break;
-					}
+          int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          node_dist = traversal_stack[stack_ptr].dist;
+          --stack_ptr;
+
+          /* Primitive intersection. */
+          switch (type & PRIMITIVE_ALL) {
+            case PRIMITIVE_TRIANGLE: {
+              int prim_count = prim_addr2 - prim_addr;
+              if (prim_count < 3) {
+                for (; prim_addr < prim_addr2; prim_addr++) {
+                  BVH_DEBUG_NEXT_INTERSECTION();
+                  kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                  if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
+                    tfar = avxf(isect->t);
+                    /* Shadow ray early termination. */
+                    if (visibility == PATH_RAY_SHADOW_OPAQUE) {
+                      return true;
+                    }
+                  }
+                }  //for
+              }
+              else {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                if (triangle_intersect8(kg,
+                                        &isect,
+                                        P,
+                                        dir,
+                                        visibility,
+                                        object,
+                                        prim_addr,
+                                        prim_count,
+                                        0,
+                                        0,
+                                        NULL,
+                                        0.0f)) {
+                  tfar = avxf(isect->t);
+                  if (visibility == PATH_RAY_SHADOW_OPAQUE) {
+                    return true;
+                  }
+                }
+              }  //prim count
+              break;
+            }
 #if BVH_FEATURE(BVH_MOTION)
-						case PRIMITIVE_MOTION_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								if(motion_triangle_intersect(kg,
-								                             isect,
-								                             P,
-								                             dir,
-								                             ray->time,
-								                             visibility,
-								                             object,
-								                             prim_addr))
-								{
-									tfar = avxf(isect->t);
-									/* Shadow ray early termination. */
-									if(visibility == PATH_RAY_SHADOW_OPAQUE) {
-										return true;
-									}
-								}
-							}
-							break;
-						}
-#endif  /* BVH_FEATURE(BVH_MOTION) */
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                BVH_DEBUG_NEXT_INTERSECTION();
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                if (motion_triangle_intersect(
+                        kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
+                  tfar = avxf(isect->t);
+                  /* Shadow ray early termination. */
+                  if (visibility == PATH_RAY_SHADOW_OPAQUE) {
+                    return true;
+                  }
+                }
+              }
+              break;
+            }
+#endif /* BVH_FEATURE(BVH_MOTION) */
 #if BVH_FEATURE(BVH_HAIR)
-						case PRIMITIVE_CURVE:
-						case PRIMITIVE_MOTION_CURVE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
-								const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-								kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
-								bool hit;
-								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
-									hit = cardinal_curve_intersect(kg,
-									                               isect,
-									                               P,
-									                               dir,
-									                               visibility,
-									                               object,
-									                               prim_addr,
-									                               ray->time,
-									                               curve_type,
-									                               lcg_state,
-									                               difl,
-									                               extmax);
-								}
-								else {
-									hit = curve_intersect(kg,
-									                      isect,
-									                      P,
-									                      dir,
-									                      visibility,
-									                      object,
-									                      prim_addr,
-									                      ray->time,
-									                      curve_type,
-									                      lcg_state,
-									                      difl,
-									                      extmax);
-								}
-								if(hit) {
-									tfar = avxf(isect->t);
-									/* Shadow ray early termination. */
-									if(visibility == PATH_RAY_SHADOW_OPAQUE) {
-										return true;
-									}
-								}
-							}
-							break;
-						}
-#endif  /* BVH_FEATURE(BVH_HAIR) */
-					}
-				}
+            case PRIMITIVE_CURVE:
+            case PRIMITIVE_MOTION_CURVE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                BVH_DEBUG_NEXT_INTERSECTION();
+                const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+                kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
+                bool hit;
+                if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+                  hit = cardinal_curve_intersect(kg,
+                                                 isect,
+                                                 P,
+                                                 dir,
+                                                 visibility,
+                                                 object,
+                                                 prim_addr,
+                                                 ray->time,
+                                                 curve_type,
+                                                 lcg_state,
+                                                 difl,
+                                                 extmax);
+                }
+                else {
+                  hit = curve_intersect(kg,
+                                        isect,
+                                        P,
+                                        dir,
+                                        visibility,
+                                        object,
+                                        prim_addr,
+                                        ray->time,
+                                        curve_type,
+                                        lcg_state,
+                                        difl,
+                                        extmax);
+                }
+                if (hit) {
+                  tfar = avxf(isect->t);
+                  /* Shadow ray early termination. */
+                  if (visibility == PATH_RAY_SHADOW_OPAQUE) {
+                    return true;
+                  }
+                }
+              }
+              break;
+            }
+#endif /* BVH_FEATURE(BVH_HAIR) */
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* Instance push. */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+        else {
+          /* Instance push. */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
 
 #  if BVH_FEATURE(BVH_MOTION)
-					qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
+          qbvh_instance_motion_push(
+              kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
 #  else
-					qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
+          qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
 #  endif
 
-					obvh_near_far_idx_calc(idir,
-					                       &near_x, &near_y, &near_z,
-					                       &far_x, &far_y, &far_z);
-					tfar = avxf(isect->t);
+          obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+          tfar = avxf(isect->t);
 #  if BVH_FEATURE(BVH_HAIR)
-					dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+          dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #  endif
-					idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+          idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 #  ifdef __KERNEL_AVX2__
-					P_idir = P*idir;
-					P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+          P_idir = P * idir;
+          P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-					org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+          org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
 #  endif
 
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-					traversal_stack[stack_ptr].dist = -FLT_MAX;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+          traversal_stack[stack_ptr].dist = -FLT_MAX;
 
-					node_addr = kernel_tex_fetch(__object_node, object);
+          node_addr = kernel_tex_fetch(__object_node, object);
 
-					BVH_DEBUG_NEXT_INSTANCE();
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+          BVH_DEBUG_NEXT_INSTANCE();
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* Instance pop. */
+      /* Instance pop. */
 #  if BVH_FEATURE(BVH_MOTION)
-			isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-			isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
-			obvh_near_far_idx_calc(idir,
-			                       &near_x, &near_y, &near_z,
-			                       &far_x, &far_y, &far_z);
-			tfar = avxf(isect->t);
+      obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+      tfar = avxf(isect->t);
 #  if BVH_FEATURE(BVH_HAIR)
-			dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+      dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #  endif
-			idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+      idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 #  ifdef __KERNEL_AVX2__
-			P_idir = P*idir;
-			P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+      P_idir = P * idir;
+      P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-			org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+      org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr].addr;
-			node_dist = traversal_stack[stack_ptr].dist;
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_INSTANCING) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr].addr;
+      node_dist = traversal_stack[stack_ptr].dist;
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_INSTANCING) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return (isect->prim != PRIM_NONE);
+  return (isect->prim != PRIM_NONE);
 }
 
 #undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_volume.h b/intern/cycles/kernel/bvh/obvh_volume.h
index e66d499dccc..fb41ae783ab 100644
--- a/intern/cycles/kernel/bvh/obvh_volume.h
+++ b/intern/cycles/kernel/bvh/obvh_volume.h
@@ -33,444 +33,448 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
                                              Intersection *isect,
                                              const uint visibility)
 {
-	/* Traversal stack in CUDA thread-local memory. */
-	OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
-	traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+  /* Traversal stack in CUDA thread-local memory. */
+  OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
+  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
 
-	/* Traversal variables in registers. */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
+  /* Traversal variables in registers. */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
 
-	/* Ray parameters in registers. */
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
+  /* Ray parameters in registers. */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
-	isect->t = ray->t;
-	isect->u = 0.0f;
-	isect->v = 0.0f;
-	isect->prim = PRIM_NONE;
-	isect->object = OBJECT_NONE;
+  isect->t = ray->t;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
 
-	avxf tnear(0.0f), tfar(ray->t);
+  avxf tnear(0.0f), tfar(ray->t);
 #if BVH_FEATURE(BVH_HAIR)
-	avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+  avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #endif
-	avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+  avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 
 #ifdef __KERNEL_AVX2__
-	float3 P_idir = P*idir;
-	avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+  float3 P_idir = P * idir;
+  avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-	avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
+  avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
 #endif
 
-	/* Offsets to select the side that becomes the lower or upper bound. */
-	int near_x, near_y, near_z;
-	int far_x, far_y, far_z;
-	obvh_near_far_idx_calc(idir,
-	                       &near_x, &near_y, &near_z,
-	                       &far_x, &far_y, &far_z);
+  /* Offsets to select the side that becomes the lower or upper bound. */
+  int near_x, near_y, near_z;
+  int far_x, far_y, far_z;
+  obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
 
-	/* Traversal loop. */
-	do {
-		do {
-			/* Traverse internal nodes. */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+  /* Traversal loop. */
+  do {
+    do {
+      /* Traverse internal nodes. */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
 #ifdef __VISIBILITY_FLAG__
-				if((__float_as_uint(inodes.x) & visibility) == 0) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
+        if ((__float_as_uint(inodes.x) & visibility) == 0) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
 #endif
 
-				avxf dist;
-				int child_mask = NODE_INTERSECT(kg,
-				                                tnear,
-				                                tfar,
+        avxf dist;
+        int child_mask = NODE_INTERSECT(kg,
+                                        tnear,
+                                        tfar,
 #ifdef __KERNEL_AVX2__
-				                                P_idir4,
+                                        P_idir4,
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-				                                org4,
+                                        org4,
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-				                                dir4,
+                                        dir4,
 #endif
-				                                idir4,
-				                                near_x, near_y, near_z,
-				                                far_x, far_y, far_z,
-				                                node_addr,
-				                                &dist);
-
-				if(child_mask != 0) {
-					avxf cnodes;
+                                        idir4,
+                                        near_x,
+                                        near_y,
+                                        near_z,
+                                        far_x,
+                                        far_y,
+                                        far_z,
+                                        node_addr,
+                                        &dist);
+
+        if (child_mask != 0) {
+          avxf cnodes;
 #if BVH_FEATURE(BVH_HAIR)
-					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-						cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
-					}
-					else
+          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
+          }
+          else
 #endif
-					{
-						cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
-					}
-
-					/* One child is hit, continue with that child. */
-					int r = __bscf(child_mask);
-					if(child_mask == 0) {
-						node_addr = __float_as_int(cnodes[r]);
-						continue;
-					}
-
-					/* Two children are hit, push far child, and continue with
-					 * closer child.
-					 */
-					int c0 = __float_as_int(cnodes[r]);
-					float d0 = ((float*)&dist)[r];
-					r = __bscf(child_mask);
-					int c1 = __float_as_int(cnodes[r]);
-					float d1 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						if(d1 < d0) {
-							node_addr = c1;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c0;
-							traversal_stack[stack_ptr].dist = d0;
-							continue;
-						}
-						else {
-							node_addr = c0;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c1;
-							traversal_stack[stack_ptr].dist = d1;
-							continue;
-						}
-					}
-
-					/* Here starts the slow path for 3 or 4 hit children. We push
-					 * all nodes onto the stack to sort them there.
-					 */
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c1;
-					traversal_stack[stack_ptr].dist = d1;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c0;
-					traversal_stack[stack_ptr].dist = d0;
-
-					/* Three children are hit, push all onto stack and sort 3
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c2 = __float_as_int(cnodes[r]);
-					float d2 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Four children are hit, push all onto stack and sort 4
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c3 = __float_as_int(cnodes[r]);
-					float d3 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c3;
-						traversal_stack[stack_ptr].dist = d3;
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c3;
-					traversal_stack[stack_ptr].dist = d3;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c2;
-					traversal_stack[stack_ptr].dist = d2;
-
-					/* Five children are hit, push all onto stack and sort 5
-					 * stack items, continue with closest child
-					 */
-					r = __bscf(child_mask);
-					int c4 = __float_as_int(cnodes[r]);
-					float d4 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c4;
-						traversal_stack[stack_ptr].dist = d4;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Six children are hit, push all onto stack and sort 6
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c5 = __float_as_int(cnodes[r]);
-					float d5 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c5;
-						traversal_stack[stack_ptr].dist = d5;
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c4;
-						traversal_stack[stack_ptr].dist = d4;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4],
-						                &traversal_stack[stack_ptr - 5]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c5;
-					traversal_stack[stack_ptr].dist = d5;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c4;
-					traversal_stack[stack_ptr].dist = d4;
-
-					/* Seven children are hit, push all onto stack and sort 7
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c6 = __float_as_int(cnodes[r]);
-					float d6 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c6;
-						traversal_stack[stack_ptr].dist = d6;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4],
-						                &traversal_stack[stack_ptr - 5],
-						                &traversal_stack[stack_ptr - 6]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Eight children are hit, push all onto stack and sort 8
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c7 = __float_as_int(cnodes[r]);
-					float d7 = ((float*)&dist)[r];
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c7;
-					traversal_stack[stack_ptr].dist = d7;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c6;
-					traversal_stack[stack_ptr].dist = d6;
-					obvh_stack_sort(&traversal_stack[stack_ptr],
-					                &traversal_stack[stack_ptr - 1],
-					                &traversal_stack[stack_ptr - 2],
-					                &traversal_stack[stack_ptr - 3],
-					                &traversal_stack[stack_ptr - 4],
-					                &traversal_stack[stack_ptr - 5],
-					                &traversal_stack[stack_ptr - 6],
-					                &traversal_stack[stack_ptr - 7]);
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
-
-				node_addr = traversal_stack[stack_ptr].addr;
-				--stack_ptr;
-			}
-
-			/* If node is leaf, fetch triangle list. */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-
-				if((__float_as_uint(leaf.z) & visibility) == 0) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
-
-				int prim_addr = __float_as_int(leaf.x);
+          {
+            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
+          }
+
+          /* One child is hit, continue with that child. */
+          int r = __bscf(child_mask);
+          if (child_mask == 0) {
+            node_addr = __float_as_int(cnodes[r]);
+            continue;
+          }
+
+          /* Two children are hit, push far child, and continue with
+           * closer child.
+           */
+          int c0 = __float_as_int(cnodes[r]);
+          float d0 = ((float *)&dist)[r];
+          r = __bscf(child_mask);
+          int c1 = __float_as_int(cnodes[r]);
+          float d1 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            if (d1 < d0) {
+              node_addr = c1;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c0;
+              traversal_stack[stack_ptr].dist = d0;
+              continue;
+            }
+            else {
+              node_addr = c0;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c1;
+              traversal_stack[stack_ptr].dist = d1;
+              continue;
+            }
+          }
+
+          /* Here starts the slow path for 3 or 4 hit children. We push
+           * all nodes onto the stack to sort them there.
+           */
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c1;
+          traversal_stack[stack_ptr].dist = d1;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c0;
+          traversal_stack[stack_ptr].dist = d0;
+
+          /* Three children are hit, push all onto stack and sort 3
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c2 = __float_as_int(cnodes[r]);
+          float d2 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Four children are hit, push all onto stack and sort 4
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c3 = __float_as_int(cnodes[r]);
+          float d3 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c3;
+            traversal_stack[stack_ptr].dist = d3;
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c3;
+          traversal_stack[stack_ptr].dist = d3;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c2;
+          traversal_stack[stack_ptr].dist = d2;
+
+          /* Five children are hit, push all onto stack and sort 5
+           * stack items, continue with closest child
+           */
+          r = __bscf(child_mask);
+          int c4 = __float_as_int(cnodes[r]);
+          float d4 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c4;
+            traversal_stack[stack_ptr].dist = d4;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Six children are hit, push all onto stack and sort 6
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c5 = __float_as_int(cnodes[r]);
+          float d5 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c5;
+            traversal_stack[stack_ptr].dist = d5;
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c4;
+            traversal_stack[stack_ptr].dist = d4;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4],
+                            &traversal_stack[stack_ptr - 5]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c5;
+          traversal_stack[stack_ptr].dist = d5;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c4;
+          traversal_stack[stack_ptr].dist = d4;
+
+          /* Seven children are hit, push all onto stack and sort 7
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c6 = __float_as_int(cnodes[r]);
+          float d6 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c6;
+            traversal_stack[stack_ptr].dist = d6;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4],
+                            &traversal_stack[stack_ptr - 5],
+                            &traversal_stack[stack_ptr - 6]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Eight children are hit, push all onto stack and sort 8
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c7 = __float_as_int(cnodes[r]);
+          float d7 = ((float *)&dist)[r];
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c7;
+          traversal_stack[stack_ptr].dist = d7;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c6;
+          traversal_stack[stack_ptr].dist = d6;
+          obvh_stack_sort(&traversal_stack[stack_ptr],
+                          &traversal_stack[stack_ptr - 1],
+                          &traversal_stack[stack_ptr - 2],
+                          &traversal_stack[stack_ptr - 3],
+                          &traversal_stack[stack_ptr - 4],
+                          &traversal_stack[stack_ptr - 5],
+                          &traversal_stack[stack_ptr - 6],
+                          &traversal_stack[stack_ptr - 7]);
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
+
+        node_addr = traversal_stack[stack_ptr].addr;
+        --stack_ptr;
+      }
+
+      /* If node is leaf, fetch triangle list. */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+
+        if ((__float_as_uint(leaf.z) & visibility) == 0) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
+
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-					const uint p_type = type & PRIMITIVE_ALL;
-
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-
-					/* Primitive intersection. */
-					switch(p_type) {
-						case PRIMITIVE_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* Only primitives from volume object. */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								/* Intersect ray against primitive. */
-								triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
-							}
-							break;
-						}
+          int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+          const uint p_type = type & PRIMITIVE_ALL;
+
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+
+          /* Primitive intersection. */
+          switch (p_type) {
+            case PRIMITIVE_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* Only primitives from volume object. */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                /* Intersect ray against primitive. */
+                triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
+              }
+              break;
+            }
 #if BVH_FEATURE(BVH_MOTION)
-						case PRIMITIVE_MOTION_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* Only primitives from volume object. */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								/* Intersect ray against primitive. */
-								motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
-							}
-							break;
-						}
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* Only primitives from volume object. */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                /* Intersect ray against primitive. */
+                motion_triangle_intersect(
+                    kg, isect, P, dir, ray->time, visibility, object, prim_addr);
+              }
+              break;
+            }
 #endif
-					}
-				}
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* Instance push. */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
-					int object_flag = kernel_tex_fetch(__object_flag, object);
-					if(object_flag & SD_OBJECT_HAS_VOLUME) {
+        else {
+          /* Instance push. */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+          int object_flag = kernel_tex_fetch(__object_flag, object);
+          if (object_flag & SD_OBJECT_HAS_VOLUME) {
 #  if BVH_FEATURE(BVH_MOTION)
-						isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+            isect->t = bvh_instance_motion_push(
+                kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-						isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
+            isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
-						obvh_near_far_idx_calc(idir,
-						                       &near_x, &near_y, &near_z,
-						                       &far_x, &far_y, &far_z);
-						tfar = avxf(isect->t);
+            obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+            tfar = avxf(isect->t);
 #  if BVH_FEATURE(BVH_HAIR)
-						dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+            dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #  endif
-						idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+            idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 #  ifdef __KERNEL_AVX2__
-						P_idir = P*idir;
-						P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+            P_idir = P * idir;
+            P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-						org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+            org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
 #  endif
 
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-						node_addr = kernel_tex_fetch(__object_node, object);
-					}
-					else {
-						/* Pop. */
-						object = OBJECT_NONE;
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-					}
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+
+            node_addr = kernel_tex_fetch(__object_node, object);
+          }
+          else {
+            /* Pop. */
+            object = OBJECT_NONE;
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+          }
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* Instance pop. */
+      /* Instance pop. */
 #  if BVH_FEATURE(BVH_MOTION)
-			isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-			isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
-			obvh_near_far_idx_calc(idir,
-			                       &near_x, &near_y, &near_z,
-			                       &far_x, &far_y, &far_z);
-			tfar = avxf(isect->t);
+      obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+      tfar = avxf(isect->t);
 #  if BVH_FEATURE(BVH_HAIR)
-			dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+      dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #  endif
-			idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+      idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 #  ifdef __KERNEL_AVX2__
-			P_idir = P*idir;
-			P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+      P_idir = P * idir;
+      P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-			org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+      org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr].addr;
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_INSTANCING) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr].addr;
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_INSTANCING) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return (isect->prim != PRIM_NONE);
+  return (isect->prim != PRIM_NONE);
 }
 
 #undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_volume_all.h b/intern/cycles/kernel/bvh/obvh_volume_all.h
index 5476f79712a..56e2afd4a11 100644
--- a/intern/cycles/kernel/bvh/obvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/obvh_volume_all.h
@@ -34,514 +34,518 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
                                              const uint max_hits,
                                              const uint visibility)
 {
-	/* Traversal stack in CUDA thread-local memory. */
-	OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
-	traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-	/* Traversal variables in registers. */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
-
-	/* Ray parameters in registers. */
-	const float tmax = ray->t;
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
-	float isect_t = tmax;
+  /* Traversal stack in CUDA thread-local memory. */
+  OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
+  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+
+  /* Traversal variables in registers. */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* Ray parameters in registers. */
+  const float tmax = ray->t;
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  float isect_t = tmax;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
-	uint num_hits = 0;
-	isect_array->t = tmax;
+  uint num_hits = 0;
+  isect_array->t = tmax;
 
 #if BVH_FEATURE(BVH_INSTANCING)
-	int num_hits_in_instance = 0;
+  int num_hits_in_instance = 0;
 #endif
 
-	avxf tnear(0.0f), tfar(isect_t);
+  avxf tnear(0.0f), tfar(isect_t);
 #if BVH_FEATURE(BVH_HAIR)
-	avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+  avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #endif
-	avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+  avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 
 #ifdef __KERNEL_AVX2__
-	float3 P_idir = P*idir;
-	avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+  float3 P_idir = P * idir;
+  avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-	avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
+  avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
 #endif
 
-	/* Offsets to select the side that becomes the lower or upper bound. */
-	int near_x, near_y, near_z;
-	int far_x, far_y, far_z;
-	obvh_near_far_idx_calc(idir,
-	                       &near_x, &near_y, &near_z,
-	                       &far_x, &far_y, &far_z);
+  /* Offsets to select the side that becomes the lower or upper bound. */
+  int near_x, near_y, near_z;
+  int far_x, far_y, far_z;
+  obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
 
-	/* Traversal loop. */
-	do {
-		do {
-			/* Traverse internal nodes. */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+  /* Traversal loop. */
+  do {
+    do {
+      /* Traverse internal nodes. */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
 #ifdef __VISIBILITY_FLAG__
-				if((__float_as_uint(inodes.x) & visibility) == 0) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
+        if ((__float_as_uint(inodes.x) & visibility) == 0) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
 #endif
 
-				avxf dist;
-				int child_mask = NODE_INTERSECT(kg,
-					                            tnear,
-					                            tfar,
+        avxf dist;
+        int child_mask = NODE_INTERSECT(kg,
+                                        tnear,
+                                        tfar,
 #ifdef __KERNEL_AVX2__
-					                            P_idir4,
+                                        P_idir4,
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-					                            org4,
+                                        org4,
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-					                            dir4,
+                                        dir4,
 #endif
-					                            idir4,
-					                            near_x, near_y, near_z,
-					                            far_x, far_y, far_z,
-					                            node_addr,
-					                            &dist);
-
-				if(child_mask != 0) {
-					avxf cnodes;
+                                        idir4,
+                                        near_x,
+                                        near_y,
+                                        near_z,
+                                        far_x,
+                                        far_y,
+                                        far_z,
+                                        node_addr,
+                                        &dist);
+
+        if (child_mask != 0) {
+          avxf cnodes;
 #if BVH_FEATURE(BVH_HAIR)
-					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-						cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
-					}
-					else
+          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
+          }
+          else
 #endif
-					{
-						cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
-					}
-
-					/* One child is hit, continue with that child. */
-					int r = __bscf(child_mask);
-					if(child_mask == 0) {
-						node_addr = __float_as_int(cnodes[r]);
-						continue;
-					}
-
-					/* Two children are hit, push far child, and continue with
-					 * closer child.
-					 */
-					int c0 = __float_as_int(cnodes[r]);
-					float d0 = ((float*)&dist)[r];
-					r = __bscf(child_mask);
-					int c1 = __float_as_int(cnodes[r]);
-					float d1 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						if(d1 < d0) {
-							node_addr = c1;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c0;
-							traversal_stack[stack_ptr].dist = d0;
-							continue;
-						}
-						else {
-							node_addr = c0;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c1;
-							traversal_stack[stack_ptr].dist = d1;
-							continue;
-						}
-					}
-
-					/* Here starts the slow path for 3 or 4 hit children. We push
-					 * all nodes onto the stack to sort them there.
-					 */
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c1;
-					traversal_stack[stack_ptr].dist = d1;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c0;
-					traversal_stack[stack_ptr].dist = d0;
-
-					/* Three children are hit, push all onto stack and sort 3
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c2 = __float_as_int(cnodes[r]);
-					float d2 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Four children are hit, push all onto stack and sort 4
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c3 = __float_as_int(cnodes[r]);
-					float d3 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c3;
-						traversal_stack[stack_ptr].dist = d3;
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c3;
-					traversal_stack[stack_ptr].dist = d3;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c2;
-					traversal_stack[stack_ptr].dist = d2;
-
-					/* Five children are hit, push all onto stack and sort 5
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c4 = __float_as_int(cnodes[r]);
-					float d4 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c4;
-						traversal_stack[stack_ptr].dist = d4;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Six children are hit, push all onto stack and sort 6
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c5 = __float_as_int(cnodes[r]);
-					float d5 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c5;
-						traversal_stack[stack_ptr].dist = d5;
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c4;
-						traversal_stack[stack_ptr].dist = d4;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4],
-						                &traversal_stack[stack_ptr - 5]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c5;
-					traversal_stack[stack_ptr].dist = d5;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c4;
-					traversal_stack[stack_ptr].dist = d4;
-
-					/* Seven children are hit, push all onto stack and sort 7
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c6 = __float_as_int(cnodes[r]);
-					float d6 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c6;
-						traversal_stack[stack_ptr].dist = d6;
-						obvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2],
-						                &traversal_stack[stack_ptr - 3],
-						                &traversal_stack[stack_ptr - 4],
-						                &traversal_stack[stack_ptr - 5],
-						                &traversal_stack[stack_ptr - 6]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Eight children are hit, push all onto stack and sort 8
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c7 = __float_as_int(cnodes[r]);
-					float d7 = ((float*)&dist)[r];
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c7;
-					traversal_stack[stack_ptr].dist = d7;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c6;
-					traversal_stack[stack_ptr].dist = d6;
-					obvh_stack_sort(&traversal_stack[stack_ptr],
-					                &traversal_stack[stack_ptr - 1],
-					                &traversal_stack[stack_ptr - 2],
-					                &traversal_stack[stack_ptr - 3],
-					                &traversal_stack[stack_ptr - 4],
-					                &traversal_stack[stack_ptr - 5],
-					                &traversal_stack[stack_ptr - 6],
-					                &traversal_stack[stack_ptr - 7]);
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
-
-				node_addr = traversal_stack[stack_ptr].addr;
-				--stack_ptr;
-			}
-
-			/* If node is leaf, fetch triangle list. */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-
-				if((__float_as_uint(leaf.z) & visibility) == 0) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
-
-				int prim_addr = __float_as_int(leaf.x);
+          {
+            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
+          }
+
+          /* One child is hit, continue with that child. */
+          int r = __bscf(child_mask);
+          if (child_mask == 0) {
+            node_addr = __float_as_int(cnodes[r]);
+            continue;
+          }
+
+          /* Two children are hit, push far child, and continue with
+           * closer child.
+           */
+          int c0 = __float_as_int(cnodes[r]);
+          float d0 = ((float *)&dist)[r];
+          r = __bscf(child_mask);
+          int c1 = __float_as_int(cnodes[r]);
+          float d1 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            if (d1 < d0) {
+              node_addr = c1;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c0;
+              traversal_stack[stack_ptr].dist = d0;
+              continue;
+            }
+            else {
+              node_addr = c0;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c1;
+              traversal_stack[stack_ptr].dist = d1;
+              continue;
+            }
+          }
+
+          /* Here starts the slow path for 3 or 4 hit children. We push
+           * all nodes onto the stack to sort them there.
+           */
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c1;
+          traversal_stack[stack_ptr].dist = d1;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c0;
+          traversal_stack[stack_ptr].dist = d0;
+
+          /* Three children are hit, push all onto stack and sort 3
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c2 = __float_as_int(cnodes[r]);
+          float d2 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Four children are hit, push all onto stack and sort 4
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c3 = __float_as_int(cnodes[r]);
+          float d3 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c3;
+            traversal_stack[stack_ptr].dist = d3;
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c3;
+          traversal_stack[stack_ptr].dist = d3;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c2;
+          traversal_stack[stack_ptr].dist = d2;
+
+          /* Five children are hit, push all onto stack and sort 5
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c4 = __float_as_int(cnodes[r]);
+          float d4 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c4;
+            traversal_stack[stack_ptr].dist = d4;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Six children are hit, push all onto stack and sort 6
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c5 = __float_as_int(cnodes[r]);
+          float d5 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c5;
+            traversal_stack[stack_ptr].dist = d5;
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c4;
+            traversal_stack[stack_ptr].dist = d4;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4],
+                            &traversal_stack[stack_ptr - 5]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c5;
+          traversal_stack[stack_ptr].dist = d5;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c4;
+          traversal_stack[stack_ptr].dist = d4;
+
+          /* Seven children are hit, push all onto stack and sort 7
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c6 = __float_as_int(cnodes[r]);
+          float d6 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c6;
+            traversal_stack[stack_ptr].dist = d6;
+            obvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2],
+                            &traversal_stack[stack_ptr - 3],
+                            &traversal_stack[stack_ptr - 4],
+                            &traversal_stack[stack_ptr - 5],
+                            &traversal_stack[stack_ptr - 6]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Eight children are hit, push all onto stack and sort 8
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c7 = __float_as_int(cnodes[r]);
+          float d7 = ((float *)&dist)[r];
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c7;
+          traversal_stack[stack_ptr].dist = d7;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c6;
+          traversal_stack[stack_ptr].dist = d6;
+          obvh_stack_sort(&traversal_stack[stack_ptr],
+                          &traversal_stack[stack_ptr - 1],
+                          &traversal_stack[stack_ptr - 2],
+                          &traversal_stack[stack_ptr - 3],
+                          &traversal_stack[stack_ptr - 4],
+                          &traversal_stack[stack_ptr - 5],
+                          &traversal_stack[stack_ptr - 6],
+                          &traversal_stack[stack_ptr - 7]);
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
+
+        node_addr = traversal_stack[stack_ptr].addr;
+        --stack_ptr;
+      }
+
+      /* If node is leaf, fetch triangle list. */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+
+        if ((__float_as_uint(leaf.z) & visibility) == 0) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
+
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-					const uint p_type = type & PRIMITIVE_ALL;
-					bool hit;
-
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-
-					/* Primitive intersection. */
-					switch(p_type) {
-						case PRIMITIVE_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* Only primitives from volume object. */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								/* Intersect ray against primitive. */
-								hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
-								if(hit) {
-									/* Move on to next entry in intersections array. */
-									isect_array++;
-									num_hits++;
+          int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+          const uint p_type = type & PRIMITIVE_ALL;
+          bool hit;
+
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+
+          /* Primitive intersection. */
+          switch (p_type) {
+            case PRIMITIVE_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* Only primitives from volume object. */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                /* Intersect ray against primitive. */
+                hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
+                if (hit) {
+                  /* Move on to next entry in intersections array. */
+                  isect_array++;
+                  num_hits++;
 #if BVH_FEATURE(BVH_INSTANCING)
-									num_hits_in_instance++;
+                  num_hits_in_instance++;
 #endif
-									isect_array->t = isect_t;
-									if(num_hits == max_hits) {
+                  isect_array->t = isect_t;
+                  if (num_hits == max_hits) {
 #if BVH_FEATURE(BVH_INSTANCING)
 #  if BVH_FEATURE(BVH_MOTION)
-										float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+                    float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
 #  else
-										Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-										float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+                    Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+                    float t_fac = 1.0f / len(transform_direction(&itfm, dir));
 #  endif
-										for(int i = 0; i < num_hits_in_instance; i++) {
-											(isect_array-i-1)->t *= t_fac;
-										}
-#endif  /* BVH_FEATURE(BVH_INSTANCING) */
-										return num_hits;
-									}
-								}
-							}
-							break;
-						}
+                    for (int i = 0; i < num_hits_in_instance; i++) {
+                      (isect_array - i - 1)->t *= t_fac;
+                    }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+                    return num_hits;
+                  }
+                }
+              }
+              break;
+            }
 #if BVH_FEATURE(BVH_MOTION)
-						case PRIMITIVE_MOTION_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* Only primitives from volume object. */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								/* Intersect ray against primitive. */
-								hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
-								if(hit) {
-									/* Move on to next entry in intersections array. */
-									isect_array++;
-									num_hits++;
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* Only primitives from volume object. */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                /* Intersect ray against primitive. */
+                hit = motion_triangle_intersect(
+                    kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
+                if (hit) {
+                  /* Move on to next entry in intersections array. */
+                  isect_array++;
+                  num_hits++;
 #  if BVH_FEATURE(BVH_INSTANCING)
-									num_hits_in_instance++;
+                  num_hits_in_instance++;
 #  endif
-									isect_array->t = isect_t;
-									if(num_hits == max_hits) {
+                  isect_array->t = isect_t;
+                  if (num_hits == max_hits) {
 #  if BVH_FEATURE(BVH_INSTANCING)
 #    if BVH_FEATURE(BVH_MOTION)
-										float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+                    float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
 #    else
-										Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-										float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+                    Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+                    float t_fac = 1.0f / len(transform_direction(&itfm, dir));
 #    endif
-										for(int i = 0; i < num_hits_in_instance; i++) {
-											(isect_array-i-1)->t *= t_fac;
-										}
-#  endif  /* BVH_FEATURE(BVH_INSTANCING) */
-										return num_hits;
-									}
-								}
-							}
-							break;
-						}
+                    for (int i = 0; i < num_hits_in_instance; i++) {
+                      (isect_array - i - 1)->t *= t_fac;
+                    }
+#  endif /* BVH_FEATURE(BVH_INSTANCING) */
+                    return num_hits;
+                  }
+                }
+              }
+              break;
+            }
 #endif
-					}
-				}
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* Instance push. */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
-					int object_flag = kernel_tex_fetch(__object_flag, object);
-					if(object_flag & SD_OBJECT_HAS_VOLUME) {
+        else {
+          /* Instance push. */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+          int object_flag = kernel_tex_fetch(__object_flag, object);
+          if (object_flag & SD_OBJECT_HAS_VOLUME) {
 #  if BVH_FEATURE(BVH_MOTION)
-						isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+            isect_t = bvh_instance_motion_push(
+                kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #  else
-						isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+            isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
-						obvh_near_far_idx_calc(idir,
-						                       &near_x, &near_y, &near_z,
-						                       &far_x, &far_y, &far_z);
-						tfar = avxf(isect_t);
-						idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+            obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+            tfar = avxf(isect_t);
+            idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 #  if BVH_FEATURE(BVH_HAIR)
-						dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+            dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #  endif
 #  ifdef __KERNEL_AVX2__
-						P_idir = P*idir;
-						P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+            P_idir = P * idir;
+            P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-						org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+            org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
 #  endif
 
-						num_hits_in_instance = 0;
-						isect_array->t = isect_t;
-
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-						node_addr = kernel_tex_fetch(__object_node, object);
-					}
-					else {
-						/* Pop. */
-						object = OBJECT_NONE;
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-					}
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+            num_hits_in_instance = 0;
+            isect_array->t = isect_t;
+
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+
+            node_addr = kernel_tex_fetch(__object_node, object);
+          }
+          else {
+            /* Pop. */
+            object = OBJECT_NONE;
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+          }
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* Instance pop. */
-			if(num_hits_in_instance) {
-				float t_fac;
+      /* Instance pop. */
+      if (num_hits_in_instance) {
+        float t_fac;
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
 #  else
-				bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
 #  endif
-				/* Scale isect->t to adjust for instancing. */
-				for(int i = 0; i < num_hits_in_instance; i++) {
-					(isect_array-i-1)->t *= t_fac;
-				}
-			}
-			else {
+        /* Scale isect->t to adjust for instancing. */
+        for (int i = 0; i < num_hits_in_instance; i++) {
+          (isect_array - i - 1)->t *= t_fac;
+        }
+      }
+      else {
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
 #  else
-				bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
 #  endif
-			}
+      }
 
-			isect_t = tmax;
-			isect_array->t = isect_t;
+      isect_t = tmax;
+      isect_array->t = isect_t;
 
-			obvh_near_far_idx_calc(idir,
-			                       &near_x, &near_y, &near_z,
-			                       &far_x, &far_y, &far_z);
-			tfar = avxf(isect_t);
+      obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+      tfar = avxf(isect_t);
 #  if BVH_FEATURE(BVH_HAIR)
-			dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
+      dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
 #  endif
-			idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
+      idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
 #  ifdef __KERNEL_AVX2__
-			P_idir = P*idir;
-			P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
+      P_idir = P * idir;
+      P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-			org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
+      org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr].addr;
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_INSTANCING) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr].addr;
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_INSTANCING) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return num_hits;
+  return num_hits;
 }
 
 #undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_local.h b/intern/cycles/kernel/bvh/qbvh_local.h
index 661182e31b3..b21f79bd3a0 100644
--- a/intern/cycles/kernel/bvh/qbvh_local.h
+++ b/intern/cycles/kernel/bvh/qbvh_local.h
@@ -35,262 +35,257 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                              uint *lcg_state,
                                              int max_hits)
 {
-	/* TODO(sergey):
-	 * - Test if pushing distance on the stack helps (for non shadow rays).
-	 * - Separate version for shadow rays.
-	 * - Likely and unlikely for if() statements.
-	 * - SSE for hair.
-	 * - Test restrict attribute for pointers.
-	 */
+  /* TODO(sergey):
+   * - Test if pushing distance on the stack helps (for non shadow rays).
+   * - Separate version for shadow rays.
+   * - Likely and unlikely for if() statements.
+   * - SSE for hair.
+   * - Test restrict attribute for pointers.
+   */
 
-	/* Traversal stack in CUDA thread-local memory. */
-	QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
-	traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+  /* Traversal stack in CUDA thread-local memory. */
+  QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
 
-	/* Traversal variables in registers. */
-	int stack_ptr = 0;
-	int node_addr = kernel_tex_fetch(__object_node, local_object);
+  /* Traversal variables in registers. */
+  int stack_ptr = 0;
+  int node_addr = kernel_tex_fetch(__object_node, local_object);
 
-	/* Ray parameters in registers. */
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
-	float isect_t = ray->t;
+  /* Ray parameters in registers. */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  float isect_t = ray->t;
 
-	if(local_isect != NULL) {
-		local_isect->num_hits = 0;
-	}
-	kernel_assert((local_isect == NULL) == (max_hits == 0));
+  if (local_isect != NULL) {
+    local_isect->num_hits = 0;
+  }
+  kernel_assert((local_isect == NULL) == (max_hits == 0));
 
-	const int object_flag = kernel_tex_fetch(__object_flag, local_object);
-	if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+  const int object_flag = kernel_tex_fetch(__object_flag, local_object);
+  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
 #if BVH_FEATURE(BVH_MOTION)
-		Transform ob_itfm;
-		isect_t = bvh_instance_motion_push(kg,
-		                                   local_object,
-		                                   ray,
-		                                   &P,
-		                                   &dir,
-		                                   &idir,
-		                                   isect_t,
-		                                   &ob_itfm);
+    Transform ob_itfm;
+    isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #else
-		isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
+    isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
 #endif
-		object = local_object;
-	}
+    object = local_object;
+  }
 
-	ssef tnear(0.0f), tfar(isect_t);
+  ssef tnear(0.0f), tfar(isect_t);
 #if BVH_FEATURE(BVH_HAIR)
-	sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+  sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #endif
-	sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+  sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 
 #ifdef __KERNEL_AVX2__
-	float3 P_idir = P*idir;
-	sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+  float3 P_idir = P * idir;
+  sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-	sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
+  sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
 #endif
 
-	/* Offsets to select the side that becomes the lower or upper bound. */
-	int near_x, near_y, near_z;
-	int far_x, far_y, far_z;
-	qbvh_near_far_idx_calc(idir,
-	                       &near_x, &near_y, &near_z,
-	                       &far_x, &far_y, &far_z);
+  /* Offsets to select the side that becomes the lower or upper bound. */
+  int near_x, near_y, near_z;
+  int far_x, far_y, far_z;
+  qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
 
-	/* Traversal loop. */
-	do {
-		do {
-			/* Traverse internal nodes. */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				ssef dist;
-				int child_mask = NODE_INTERSECT(kg,
-				                                tnear,
-				                                tfar,
+  /* Traversal loop. */
+  do {
+    do {
+      /* Traverse internal nodes. */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        ssef dist;
+        int child_mask = NODE_INTERSECT(kg,
+                                        tnear,
+                                        tfar,
 #ifdef __KERNEL_AVX2__
-				                                P_idir4,
+                                        P_idir4,
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-				                                org4,
+                                        org4,
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-				                                dir4,
+                                        dir4,
 #endif
-				                                idir4,
-				                                near_x, near_y, near_z,
-				                                far_x, far_y, far_z,
-				                                node_addr,
-				                                &dist);
+                                        idir4,
+                                        near_x,
+                                        near_y,
+                                        near_z,
+                                        far_x,
+                                        far_y,
+                                        far_z,
+                                        node_addr,
+                                        &dist);
 
-				if(child_mask != 0) {
-					float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-					float4 cnodes;
+        if (child_mask != 0) {
+          float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+          float4 cnodes;
 #if BVH_FEATURE(BVH_HAIR)
-					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-						cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
-					}
-					else
+          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
+          }
+          else
 #endif
-					{
-						cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
-					}
+          {
+            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
+          }
 
-					/* One child is hit, continue with that child. */
-					int r = __bscf(child_mask);
-					if(child_mask == 0) {
-						node_addr = __float_as_int(cnodes[r]);
-						continue;
-					}
+          /* One child is hit, continue with that child. */
+          int r = __bscf(child_mask);
+          if (child_mask == 0) {
+            node_addr = __float_as_int(cnodes[r]);
+            continue;
+          }
 
-					/* Two children are hit, push far child, and continue with
-					 * closer child.
-					 */
-					int c0 = __float_as_int(cnodes[r]);
-					float d0 = ((float*)&dist)[r];
-					r = __bscf(child_mask);
-					int c1 = __float_as_int(cnodes[r]);
-					float d1 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						if(d1 < d0) {
-							node_addr = c1;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c0;
-							traversal_stack[stack_ptr].dist = d0;
-							continue;
-						}
-						else {
-							node_addr = c0;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c1;
-							traversal_stack[stack_ptr].dist = d1;
-							continue;
-						}
-					}
+          /* Two children are hit, push far child, and continue with
+           * closer child.
+           */
+          int c0 = __float_as_int(cnodes[r]);
+          float d0 = ((float *)&dist)[r];
+          r = __bscf(child_mask);
+          int c1 = __float_as_int(cnodes[r]);
+          float d1 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            if (d1 < d0) {
+              node_addr = c1;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c0;
+              traversal_stack[stack_ptr].dist = d0;
+              continue;
+            }
+            else {
+              node_addr = c0;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c1;
+              traversal_stack[stack_ptr].dist = d1;
+              continue;
+            }
+          }
 
-					/* Here starts the slow path for 3 or 4 hit children. We push
-					 * all nodes onto the stack to sort them there.
-					 */
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c1;
-					traversal_stack[stack_ptr].dist = d1;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c0;
-					traversal_stack[stack_ptr].dist = d0;
+          /* Here starts the slow path for 3 or 4 hit children. We push
+           * all nodes onto the stack to sort them there.
+           */
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c1;
+          traversal_stack[stack_ptr].dist = d1;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c0;
+          traversal_stack[stack_ptr].dist = d0;
 
-					/* Three children are hit, push all onto stack and sort 3
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c2 = __float_as_int(cnodes[r]);
-					float d2 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						qbvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
+          /* Three children are hit, push all onto stack and sort 3
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c2 = __float_as_int(cnodes[r]);
+          float d2 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            qbvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
 
-					/* Four children are hit, push all onto stack and sort 4
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c3 = __float_as_int(cnodes[r]);
-					float d3 = ((float*)&dist)[r];
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c3;
-					traversal_stack[stack_ptr].dist = d3;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c2;
-					traversal_stack[stack_ptr].dist = d2;
-					qbvh_stack_sort(&traversal_stack[stack_ptr],
-					                &traversal_stack[stack_ptr - 1],
-					                &traversal_stack[stack_ptr - 2],
-					                &traversal_stack[stack_ptr - 3]);
-				}
+          /* Four children are hit, push all onto stack and sort 4
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c3 = __float_as_int(cnodes[r]);
+          float d3 = ((float *)&dist)[r];
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c3;
+          traversal_stack[stack_ptr].dist = d3;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c2;
+          traversal_stack[stack_ptr].dist = d2;
+          qbvh_stack_sort(&traversal_stack[stack_ptr],
+                          &traversal_stack[stack_ptr - 1],
+                          &traversal_stack[stack_ptr - 2],
+                          &traversal_stack[stack_ptr - 3]);
+        }
 
-				node_addr = traversal_stack[stack_ptr].addr;
-				--stack_ptr;
-			}
+        node_addr = traversal_stack[stack_ptr].addr;
+        --stack_ptr;
+      }
 
-			/* If node is leaf, fetch triangle list. */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-				int prim_addr = __float_as_int(leaf.x);
+      /* If node is leaf, fetch triangle list. */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
 
-				int prim_addr2 = __float_as_int(leaf.y);
-				const uint type = __float_as_int(leaf.w);
+        int prim_addr2 = __float_as_int(leaf.y);
+        const uint type = __float_as_int(leaf.w);
 
-				/* Pop. */
-				node_addr = traversal_stack[stack_ptr].addr;
-				--stack_ptr;
+        /* Pop. */
+        node_addr = traversal_stack[stack_ptr].addr;
+        --stack_ptr;
 
-				/* Primitive intersection. */
-				switch(type & PRIMITIVE_ALL) {
-					case PRIMITIVE_TRIANGLE: {
-						/* Intersect ray against primitive, */
-						for(; prim_addr < prim_addr2; prim_addr++) {
-							kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-							if(triangle_intersect_local(kg,
-							                            local_isect,
-							                            P,
-							                            dir,
-							                            object,
-							                            local_object,
-							                            prim_addr,
-							                            isect_t,
-							                            lcg_state,
-							                            max_hits)) {
-								return true;
-							}
-						}
-						break;
-					}
+        /* Primitive intersection. */
+        switch (type & PRIMITIVE_ALL) {
+          case PRIMITIVE_TRIANGLE: {
+            /* Intersect ray against primitive, */
+            for (; prim_addr < prim_addr2; prim_addr++) {
+              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+              if (triangle_intersect_local(kg,
+                                           local_isect,
+                                           P,
+                                           dir,
+                                           object,
+                                           local_object,
+                                           prim_addr,
+                                           isect_t,
+                                           lcg_state,
+                                           max_hits)) {
+                return true;
+              }
+            }
+            break;
+          }
 #if BVH_FEATURE(BVH_MOTION)
-					case PRIMITIVE_MOTION_TRIANGLE: {
-						/* Intersect ray against primitive. */
-						for(; prim_addr < prim_addr2; prim_addr++) {
-							kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-							if(motion_triangle_intersect_local(kg,
-							                                   local_isect,
-							                                   P,
-							                                   dir,
-							                                   ray->time,
-							                                   object,
-							                                   local_object,
-							                                   prim_addr,
-							                                   isect_t,
-							                                   lcg_state,
-							                                   max_hits)) {
-								return true;
-							}
-						}
-						break;
-					}
+          case PRIMITIVE_MOTION_TRIANGLE: {
+            /* Intersect ray against primitive. */
+            for (; prim_addr < prim_addr2; prim_addr++) {
+              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+              if (motion_triangle_intersect_local(kg,
+                                                  local_isect,
+                                                  P,
+                                                  dir,
+                                                  ray->time,
+                                                  object,
+                                                  local_object,
+                                                  prim_addr,
+                                                  isect_t,
+                                                  lcg_state,
+                                                  max_hits)) {
+                return true;
+              }
+            }
+            break;
+          }
 #endif
-					default:
-						break;
-				}
-			}
-		} while(node_addr != ENTRYPOINT_SENTINEL);
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+          default:
+            break;
+        }
+      }
+    } while (node_addr != ENTRYPOINT_SENTINEL);
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return false;
+  return false;
 }
 
 #undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h
index 2e622af1758..7c1d8c8c72e 100644
--- a/intern/cycles/kernel/bvh/qbvh_nodes.h
+++ b/intern/cycles/kernel/bvh/qbvh_nodes.h
@@ -17,11 +17,11 @@
  */
 
 struct QBVHStackItem {
-	int addr;
-	float dist;
+  int addr;
+  float dist;
 };
 
-ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir,
+ccl_device_inline void qbvh_near_far_idx_calc(const float3 &idir,
                                               int *ccl_restrict near_x,
                                               int *ccl_restrict near_y,
                                               int *ccl_restrict near_z,
@@ -31,44 +31,76 @@ ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir,
 
 {
 #ifdef __KERNEL_SSE__
-	*near_x = 0; *far_x = 1;
-	*near_y = 2; *far_y = 3;
-	*near_z = 4; *far_z = 5;
-
-	const size_t mask = movemask(ssef(idir.m128));
-
-	const int mask_x = mask & 1;
-	const int mask_y = (mask & 2) >> 1;
-	const int mask_z = (mask & 4) >> 2;
-
-	*near_x += mask_x; *far_x -= mask_x;
-	*near_y += mask_y; *far_y -= mask_y;
-	*near_z += mask_z; *far_z -= mask_z;
+  *near_x = 0;
+  *far_x = 1;
+  *near_y = 2;
+  *far_y = 3;
+  *near_z = 4;
+  *far_z = 5;
+
+  const size_t mask = movemask(ssef(idir.m128));
+
+  const int mask_x = mask & 1;
+  const int mask_y = (mask & 2) >> 1;
+  const int mask_z = (mask & 4) >> 2;
+
+  *near_x += mask_x;
+  *far_x -= mask_x;
+  *near_y += mask_y;
+  *far_y -= mask_y;
+  *near_z += mask_z;
+  *far_z -= mask_z;
 #else
-	if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; }
-	if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; }
-	if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; }
+  if (idir.x >= 0.0f) {
+    *near_x = 0;
+    *far_x = 1;
+  }
+  else {
+    *near_x = 1;
+    *far_x = 0;
+  }
+  if (idir.y >= 0.0f) {
+    *near_y = 2;
+    *far_y = 3;
+  }
+  else {
+    *near_y = 3;
+    *far_y = 2;
+  }
+  if (idir.z >= 0.0f) {
+    *near_z = 4;
+    *far_z = 5;
+  }
+  else {
+    *near_z = 5;
+    *far_z = 4;
+  }
 #endif
 }
 
 /* TOOD(sergey): Investigate if using intrinsics helps for both
  * stack item swap and float comparison.
  */
-ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a,
-                                      QBVHStackItem *ccl_restrict b)
+ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a, QBVHStackItem *ccl_restrict b)
 {
-	QBVHStackItem tmp = *a;
-	*a = *b;
-	*b = tmp;
+  QBVHStackItem tmp = *a;
+  *a = *b;
+  *b = tmp;
 }
 
 ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
                                        QBVHStackItem *ccl_restrict s2,
                                        QBVHStackItem *ccl_restrict s3)
 {
-	if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
-	if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
-	if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
+  if (s2->dist < s1->dist) {
+    qbvh_item_swap(s2, s1);
+  }
+  if (s3->dist < s2->dist) {
+    qbvh_item_swap(s3, s2);
+  }
+  if (s2->dist < s1->dist) {
+    qbvh_item_swap(s2, s1);
+  }
 }
 
 ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
@@ -76,279 +108,283 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
                                        QBVHStackItem *ccl_restrict s3,
                                        QBVHStackItem *ccl_restrict s4)
 {
-	if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
-	if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); }
-	if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); }
-	if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); }
-	if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
+  if (s2->dist < s1->dist) {
+    qbvh_item_swap(s2, s1);
+  }
+  if (s4->dist < s3->dist) {
+    qbvh_item_swap(s4, s3);
+  }
+  if (s3->dist < s1->dist) {
+    qbvh_item_swap(s3, s1);
+  }
+  if (s4->dist < s2->dist) {
+    qbvh_item_swap(s4, s2);
+  }
+  if (s3->dist < s2->dist) {
+    qbvh_item_swap(s3, s2);
+  }
 }
 
 /* Axis-aligned nodes intersection */
 
 //ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
 static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
-                                                  const ssef& isect_near,
-                                                  const ssef& isect_far,
+                                       const ssef &isect_near,
+                                       const ssef &isect_far,
 #ifdef __KERNEL_AVX2__
-                                                  const sse3f& org_idir,
+                                       const sse3f &org_idir,
 #else
-                                                  const sse3f& org,
+                                       const sse3f &org,
 #endif
-                                                  const sse3f& idir,
-                                                  const int near_x,
-                                                  const int near_y,
-                                                  const int near_z,
-                                                  const int far_x,
-                                                  const int far_y,
-                                                  const int far_z,
-                                                  const int node_addr,
-                                                  ssef *ccl_restrict dist)
+                                       const sse3f &idir,
+                                       const int near_x,
+                                       const int near_y,
+                                       const int near_z,
+                                       const int far_x,
+                                       const int far_y,
+                                       const int far_z,
+                                       const int node_addr,
+                                       ssef *ccl_restrict dist)
 {
-	const int offset = node_addr + 1;
+  const int offset = node_addr + 1;
 #ifdef __KERNEL_AVX2__
-	const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
-	const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
-	const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
-	const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x);
-	const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y);
-	const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z);
+  const ssef tnear_x = msub(
+      kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, org_idir.x);
+  const ssef tnear_y = msub(
+      kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, org_idir.y);
+  const ssef tnear_z = msub(
+      kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, org_idir.z);
+  const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, org_idir.x);
+  const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, org_idir.y);
+  const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, org_idir.z);
 #else
-	const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x;
-	const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y;
-	const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z;
-	const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x;
-	const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y;
-	const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z;
+  const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - org.x) * idir.x;
+  const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - org.y) * idir.y;
+  const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - org.z) * idir.z;
+  const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - org.x) * idir.x;
+  const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - org.y) * idir.y;
+  const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - org.z) * idir.z;
 #endif
 
 #ifdef __KERNEL_SSE41__
-	const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near));
-	const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far));
-	const sseb vmask = cast(tnear) > cast(tfar);
-	int mask = (int)movemask(vmask)^0xf;
+  const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near));
+  const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far));
+  const sseb vmask = cast(tnear) > cast(tfar);
+  int mask = (int)movemask(vmask) ^ 0xf;
 #else
-	const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-	const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-	const sseb vmask = tnear <= tfar;
-	int mask = (int)movemask(vmask);
+  const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+  const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+  const sseb vmask = tnear <= tfar;
+  int mask = (int)movemask(vmask);
 #endif
-	*dist = tnear;
-	return mask;
+  *dist = tnear;
+  return mask;
 }
 
-ccl_device_inline int qbvh_aligned_node_intersect_robust(
-        KernelGlobals *ccl_restrict kg,
-        const ssef& isect_near,
-        const ssef& isect_far,
+ccl_device_inline int qbvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+                                                         const ssef &isect_near,
+                                                         const ssef &isect_far,
 #ifdef __KERNEL_AVX2__
-        const sse3f& P_idir,
+                                                         const sse3f &P_idir,
 #else
-        const sse3f& P,
+                                                         const sse3f &P,
 #endif
-        const sse3f& idir,
-        const int near_x,
-        const int near_y,
-        const int near_z,
-        const int far_x,
-        const int far_y,
-        const int far_z,
-        const int node_addr,
-        const float difl,
-        ssef *ccl_restrict dist)
+                                                         const sse3f &idir,
+                                                         const int near_x,
+                                                         const int near_y,
+                                                         const int near_z,
+                                                         const int far_x,
+                                                         const int far_y,
+                                                         const int far_z,
+                                                         const int node_addr,
+                                                         const float difl,
+                                                         ssef *ccl_restrict dist)
 {
-	const int offset = node_addr + 1;
+  const int offset = node_addr + 1;
 #ifdef __KERNEL_AVX2__
-	const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
-	const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
-	const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z);
-	const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x);
-	const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y);
-	const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z);
+  const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, P_idir.x);
+  const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, P_idir.y);
+  const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, P_idir.z);
+  const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, P_idir.x);
+  const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, P_idir.y);
+  const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, P_idir.z);
 #else
-	const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x;
-	const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y;
-	const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z;
-	const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x;
-	const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y;
-	const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z;
+  const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - P.x) * idir.x;
+  const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - P.y) * idir.y;
+  const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - P.z) * idir.z;
+  const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - P.x) * idir.x;
+  const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - P.y) * idir.y;
+  const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - P.z) * idir.z;
 #endif
 
-	const float round_down = 1.0f - difl;
-	const float round_up = 1.0f + difl;
-	const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-	const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-	const sseb vmask = round_down*tnear <= round_up*tfar;
-	*dist = tnear;
-	return (int)movemask(vmask);
+  const float round_down = 1.0f - difl;
+  const float round_up = 1.0f + difl;
+  const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+  const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+  const sseb vmask = round_down * tnear <= round_up * tfar;
+  *dist = tnear;
+  return (int)movemask(vmask);
 }
 
 /* Unaligned nodes intersection */
 
-ccl_device_inline int qbvh_unaligned_node_intersect(
-        KernelGlobals *ccl_restrict kg,
-        const ssef& isect_near,
-        const ssef& isect_far,
+ccl_device_inline int qbvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
+                                                    const ssef &isect_near,
+                                                    const ssef &isect_far,
 #ifdef __KERNEL_AVX2__
-        const sse3f& org_idir,
+                                                    const sse3f &org_idir,
 #endif
-        const sse3f& org,
-        const sse3f& dir,
-        const sse3f& idir,
-        const int near_x,
-        const int near_y,
-        const int near_z,
-        const int far_x,
-        const int far_y,
-        const int far_z,
-        const int node_addr,
-        ssef *ccl_restrict dist)
+                                                    const sse3f &org,
+                                                    const sse3f &dir,
+                                                    const sse3f &idir,
+                                                    const int near_x,
+                                                    const int near_y,
+                                                    const int near_z,
+                                                    const int far_x,
+                                                    const int far_y,
+                                                    const int far_z,
+                                                    const int node_addr,
+                                                    ssef *ccl_restrict dist)
 {
-	const int offset = node_addr;
-	const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
-	const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
-	const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
+  const int offset = node_addr;
+  const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1);
+  const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2);
+  const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3);
 
-	const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
-	const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
-	const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
+  const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4);
+  const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5);
+  const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6);
 
-	const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
-	const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
-	const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
+  const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7);
+  const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8);
+  const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9);
 
-	const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
-	const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
-	const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
+  const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10);
+  const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11);
+  const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12);
 
-	const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
-	           aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
-	           aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
+  const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
+             aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
+             aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
 
-	const ssef aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x,
-	           aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y,
-	           aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z;
+  const ssef aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
+             aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
+             aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
 
-	const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
-	const ssef nrdir_x = neg_one / aligned_dir_x,
-	           nrdir_y = neg_one / aligned_dir_y,
-	           nrdir_z = neg_one / aligned_dir_z;
+  const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
+  const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
+             nrdir_z = neg_one / aligned_dir_z;
 
-	const ssef tlower_x = aligned_P_x * nrdir_x,
-	           tlower_y = aligned_P_y * nrdir_y,
-	           tlower_z = aligned_P_z * nrdir_z;
+  const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
+             tlower_z = aligned_P_z * nrdir_z;
 
-	const ssef tupper_x = tlower_x - nrdir_x,
-	           tupper_y = tlower_y - nrdir_y,
-	           tupper_z = tlower_z - nrdir_z;
+  const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
+             tupper_z = tlower_z - nrdir_z;
 
 #ifdef __KERNEL_SSE41__
-	const ssef tnear_x = mini(tlower_x, tupper_x);
-	const ssef tnear_y = mini(tlower_y, tupper_y);
-	const ssef tnear_z = mini(tlower_z, tupper_z);
-	const ssef tfar_x = maxi(tlower_x, tupper_x);
-	const ssef tfar_y = maxi(tlower_y, tupper_y);
-	const ssef tfar_z = maxi(tlower_z, tupper_z);
-	const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-	const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-	const sseb vmask = tnear <= tfar;
-	*dist = tnear;
-	return movemask(vmask);
+  const ssef tnear_x = mini(tlower_x, tupper_x);
+  const ssef tnear_y = mini(tlower_y, tupper_y);
+  const ssef tnear_z = mini(tlower_z, tupper_z);
+  const ssef tfar_x = maxi(tlower_x, tupper_x);
+  const ssef tfar_y = maxi(tlower_y, tupper_y);
+  const ssef tfar_z = maxi(tlower_z, tupper_z);
+  const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+  const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+  const sseb vmask = tnear <= tfar;
+  *dist = tnear;
+  return movemask(vmask);
 #else
-	const ssef tnear_x = min(tlower_x, tupper_x);
-	const ssef tnear_y = min(tlower_y, tupper_y);
-	const ssef tnear_z = min(tlower_z, tupper_z);
-	const ssef tfar_x = max(tlower_x, tupper_x);
-	const ssef tfar_y = max(tlower_y, tupper_y);
-	const ssef tfar_z = max(tlower_z, tupper_z);
-	const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-	const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-	const sseb vmask = tnear <= tfar;
-	*dist = tnear;
-	return movemask(vmask);
+  const ssef tnear_x = min(tlower_x, tupper_x);
+  const ssef tnear_y = min(tlower_y, tupper_y);
+  const ssef tnear_z = min(tlower_z, tupper_z);
+  const ssef tfar_x = max(tlower_x, tupper_x);
+  const ssef tfar_y = max(tlower_y, tupper_y);
+  const ssef tfar_z = max(tlower_z, tupper_z);
+  const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+  const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+  const sseb vmask = tnear <= tfar;
+  *dist = tnear;
+  return movemask(vmask);
 #endif
 }
 
-ccl_device_inline int qbvh_unaligned_node_intersect_robust(
-        KernelGlobals *ccl_restrict kg,
-        const ssef& isect_near,
-        const ssef& isect_far,
+ccl_device_inline int qbvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+                                                           const ssef &isect_near,
+                                                           const ssef &isect_far,
 #ifdef __KERNEL_AVX2__
-        const sse3f& P_idir,
+                                                           const sse3f &P_idir,
 #endif
-        const sse3f& P,
-        const sse3f& dir,
-        const sse3f& idir,
-        const int near_x,
-        const int near_y,
-        const int near_z,
-        const int far_x,
-        const int far_y,
-        const int far_z,
-        const int node_addr,
-        const float difl,
-        ssef *ccl_restrict dist)
+                                                           const sse3f &P,
+                                                           const sse3f &dir,
+                                                           const sse3f &idir,
+                                                           const int near_x,
+                                                           const int near_y,
+                                                           const int near_z,
+                                                           const int far_x,
+                                                           const int far_y,
+                                                           const int far_z,
+                                                           const int node_addr,
+                                                           const float difl,
+                                                           ssef *ccl_restrict dist)
 {
-	const int offset = node_addr;
-	const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
-	const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
-	const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
+  const int offset = node_addr;
+  const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1);
+  const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2);
+  const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3);
 
-	const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
-	const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
-	const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
+  const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4);
+  const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5);
+  const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6);
 
-	const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
-	const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
-	const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
+  const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7);
+  const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8);
+  const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9);
 
-	const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
-	const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
-	const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
+  const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10);
+  const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11);
+  const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12);
 
-	const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
-	           aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
-	           aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
+  const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
+             aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
+             aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
 
-	const ssef aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x,
-	           aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y,
-	           aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z;
+  const ssef aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x,
+             aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y,
+             aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z;
 
-	const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
-	const ssef nrdir_x = neg_one / aligned_dir_x,
-	           nrdir_y = neg_one / aligned_dir_y,
-	           nrdir_z = neg_one / aligned_dir_z;
+  const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
+  const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
+             nrdir_z = neg_one / aligned_dir_z;
 
-	const ssef tlower_x = aligned_P_x * nrdir_x,
-	           tlower_y = aligned_P_y * nrdir_y,
-	           tlower_z = aligned_P_z * nrdir_z;
+  const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
+             tlower_z = aligned_P_z * nrdir_z;
 
-	const ssef tupper_x = tlower_x - nrdir_x,
-	           tupper_y = tlower_y - nrdir_y,
-	           tupper_z = tlower_z - nrdir_z;
+  const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
+             tupper_z = tlower_z - nrdir_z;
 
-	const float round_down = 1.0f - difl;
-	const float round_up = 1.0f + difl;
+  const float round_down = 1.0f - difl;
+  const float round_up = 1.0f + difl;
 
 #ifdef __KERNEL_SSE41__
-	const ssef tnear_x = mini(tlower_x, tupper_x);
-	const ssef tnear_y = mini(tlower_y, tupper_y);
-	const ssef tnear_z = mini(tlower_z, tupper_z);
-	const ssef tfar_x = maxi(tlower_x, tupper_x);
-	const ssef tfar_y = maxi(tlower_y, tupper_y);
-	const ssef tfar_z = maxi(tlower_z, tupper_z);
+  const ssef tnear_x = mini(tlower_x, tupper_x);
+  const ssef tnear_y = mini(tlower_y, tupper_y);
+  const ssef tnear_z = mini(tlower_z, tupper_z);
+  const ssef tfar_x = maxi(tlower_x, tupper_x);
+  const ssef tfar_y = maxi(tlower_y, tupper_y);
+  const ssef tfar_z = maxi(tlower_z, tupper_z);
 #else
-	const ssef tnear_x = min(tlower_x, tupper_x);
-	const ssef tnear_y = min(tlower_y, tupper_y);
-	const ssef tnear_z = min(tlower_z, tupper_z);
-	const ssef tfar_x = max(tlower_x, tupper_x);
-	const ssef tfar_y = max(tlower_y, tupper_y);
-	const ssef tfar_z = max(tlower_z, tupper_z);
+  const ssef tnear_x = min(tlower_x, tupper_x);
+  const ssef tnear_y = min(tlower_y, tupper_y);
+  const ssef tnear_z = min(tlower_z, tupper_z);
+  const ssef tfar_x = max(tlower_x, tupper_x);
+  const ssef tfar_y = max(tlower_y, tupper_y);
+  const ssef tfar_z = max(tlower_z, tupper_z);
 #endif
-	const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-	const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-	const sseb vmask = round_down*tnear <= round_up*tfar;
-	*dist = tnear;
-	return movemask(vmask);
+  const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
+  const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
+  const sseb vmask = round_down * tnear <= round_up * tfar;
+  *dist = tnear;
+  return movemask(vmask);
 }
 
 /* Intersectors wrappers.
@@ -356,111 +392,125 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
  * They'll check node type and call appropriate intersection code.
  */
 
-ccl_device_inline int qbvh_node_intersect(
-        KernelGlobals *ccl_restrict kg,
-        const ssef& isect_near,
-        const ssef& isect_far,
+ccl_device_inline int qbvh_node_intersect(KernelGlobals *ccl_restrict kg,
+                                          const ssef &isect_near,
+                                          const ssef &isect_far,
 #ifdef __KERNEL_AVX2__
-        const sse3f& org_idir,
+                                          const sse3f &org_idir,
 #endif
-        const sse3f& org,
-        const sse3f& dir,
-        const sse3f& idir,
-        const int near_x,
-        const int near_y,
-        const int near_z,
-        const int far_x,
-        const int far_y,
-        const int far_z,
-        const int node_addr,
-        ssef *ccl_restrict dist)
+                                          const sse3f &org,
+                                          const sse3f &dir,
+                                          const sse3f &idir,
+                                          const int near_x,
+                                          const int near_y,
+                                          const int near_z,
+                                          const int far_x,
+                                          const int far_y,
+                                          const int far_z,
+                                          const int node_addr,
+                                          ssef *ccl_restrict dist)
 {
-	const int offset = node_addr;
-	const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
-	if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-		return qbvh_unaligned_node_intersect(kg,
-		                                     isect_near,
-		                                     isect_far,
+  const int offset = node_addr;
+  const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
+  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+    return qbvh_unaligned_node_intersect(kg,
+                                         isect_near,
+                                         isect_far,
 #ifdef __KERNEL_AVX2__
-		                                     org_idir,
+                                         org_idir,
 #endif
-		                                     org,
-		                                     dir,
-		                                     idir,
-		                                     near_x, near_y, near_z,
-		                                     far_x, far_y, far_z,
-		                                     node_addr,
-		                                     dist);
-	}
-	else {
-		return qbvh_aligned_node_intersect(kg,
-		                                   isect_near,
-		                                   isect_far,
+                                         org,
+                                         dir,
+                                         idir,
+                                         near_x,
+                                         near_y,
+                                         near_z,
+                                         far_x,
+                                         far_y,
+                                         far_z,
+                                         node_addr,
+                                         dist);
+  }
+  else {
+    return qbvh_aligned_node_intersect(kg,
+                                       isect_near,
+                                       isect_far,
 #ifdef __KERNEL_AVX2__
-		                                   org_idir,
+                                       org_idir,
 #else
-		                                   org,
+                                       org,
 #endif
-		                                   idir,
-		                                   near_x, near_y, near_z,
-		                                   far_x, far_y, far_z,
-		                                   node_addr,
-		                                   dist);
-	}
+                                       idir,
+                                       near_x,
+                                       near_y,
+                                       near_z,
+                                       far_x,
+                                       far_y,
+                                       far_z,
+                                       node_addr,
+                                       dist);
+  }
 }
 
-ccl_device_inline int qbvh_node_intersect_robust(
-        KernelGlobals *ccl_restrict kg,
-        const ssef& isect_near,
-        const ssef& isect_far,
+ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *ccl_restrict kg,
+                                                 const ssef &isect_near,
+                                                 const ssef &isect_far,
 #ifdef __KERNEL_AVX2__
-        const sse3f& P_idir,
+                                                 const sse3f &P_idir,
 #endif
-        const sse3f& P,
-        const sse3f& dir,
-        const sse3f& idir,
-        const int near_x,
-        const int near_y,
-        const int near_z,
-        const int far_x,
-        const int far_y,
-        const int far_z,
-        const int node_addr,
-        const float difl,
-        ssef *ccl_restrict dist)
+                                                 const sse3f &P,
+                                                 const sse3f &dir,
+                                                 const sse3f &idir,
+                                                 const int near_x,
+                                                 const int near_y,
+                                                 const int near_z,
+                                                 const int far_x,
+                                                 const int far_y,
+                                                 const int far_z,
+                                                 const int node_addr,
+                                                 const float difl,
+                                                 ssef *ccl_restrict dist)
 {
-	const int offset = node_addr;
-	const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
-	if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-		return qbvh_unaligned_node_intersect_robust(kg,
-		                                            isect_near,
-		                                            isect_far,
+  const int offset = node_addr;
+  const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
+  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+    return qbvh_unaligned_node_intersect_robust(kg,
+                                                isect_near,
+                                                isect_far,
 #ifdef __KERNEL_AVX2__
-		                                            P_idir,
+                                                P_idir,
 #endif
-		                                            P,
-		                                            dir,
-		                                            idir,
-		                                            near_x, near_y, near_z,
-		                                            far_x, far_y, far_z,
-		                                            node_addr,
-		                                            difl,
-		                                            dist);
-	}
-	else {
-		return qbvh_aligned_node_intersect_robust(kg,
-		                                          isect_near,
-		                                          isect_far,
+                                                P,
+                                                dir,
+                                                idir,
+                                                near_x,
+                                                near_y,
+                                                near_z,
+                                                far_x,
+                                                far_y,
+                                                far_z,
+                                                node_addr,
+                                                difl,
+                                                dist);
+  }
+  else {
+    return qbvh_aligned_node_intersect_robust(kg,
+                                              isect_near,
+                                              isect_far,
 #ifdef __KERNEL_AVX2__
-		                                          P_idir,
+                                              P_idir,
 #else
-		                                          P,
+                                              P,
 #endif
-		                                          idir,
-		                                          near_x, near_y, near_z,
-		                                          far_x, far_y, far_z,
-		                                          node_addr,
-		                                          difl,
-		                                          dist);
-	}
+                                              idir,
+                                              near_x,
+                                              near_y,
+                                              near_z,
+                                              far_x,
+                                              far_y,
+                                              far_z,
+                                              node_addr,
+                                              difl,
+                                              dist);
+  }
 }
diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
index dd977fb9e74..49e607bfbd0 100644
--- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
@@ -36,439 +36,424 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                              const uint max_hits,
                                              uint *num_hits)
 {
-	/* TODO(sergey):
-	*  - Test if pushing distance on the stack helps.
-	 * - Likely and unlikely for if() statements.
-	 * - Test restrict attribute for pointers.
-	 */
-
-	/* Traversal stack in CUDA thread-local memory. */
-	QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
-	traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-	/* Traversal variables in registers. */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
-
-	/* Ray parameters in registers. */
-	const float tmax = ray->t;
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
-	float isect_t = tmax;
+  /* TODO(sergey):
+  *  - Test if pushing distance on the stack helps.
+   * - Likely and unlikely for if() statements.
+   * - Test restrict attribute for pointers.
+   */
+
+  /* Traversal stack in CUDA thread-local memory. */
+  QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+
+  /* Traversal variables in registers. */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* Ray parameters in registers. */
+  const float tmax = ray->t;
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  float isect_t = tmax;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
-	*num_hits = 0;
-	isect_array->t = tmax;
-
+  *num_hits = 0;
+  isect_array->t = tmax;
 
 #if BVH_FEATURE(BVH_INSTANCING)
-	int num_hits_in_instance = 0;
+  int num_hits_in_instance = 0;
 #endif
 
-	ssef tnear(0.0f), tfar(isect_t);
+  ssef tnear(0.0f), tfar(isect_t);
 #if BVH_FEATURE(BVH_HAIR)
-	sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+  sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #endif
-	sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+  sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 
 #ifdef __KERNEL_AVX2__
-	float3 P_idir = P*idir;
-	sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+  float3 P_idir = P * idir;
+  sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-	sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
+  sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
 #endif
 
-	/* Offsets to select the side that becomes the lower or upper bound. */
-	int near_x, near_y, near_z;
-	int far_x, far_y, far_z;
-	qbvh_near_far_idx_calc(idir,
-	                       &near_x, &near_y, &near_z,
-	                       &far_x, &far_y, &far_z);
-
-	/* Traversal loop. */
-	do {
-		do {
-			/* Traverse internal nodes. */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-				(void) inodes;
-
-				if(false
+  /* Offsets to select the side that becomes the lower or upper bound. */
+  int near_x, near_y, near_z;
+  int far_x, far_y, far_z;
+  qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+
+  /* Traversal loop. */
+  do {
+    do {
+      /* Traverse internal nodes. */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+        (void)inodes;
+
+        if (false
 #ifdef __VISIBILITY_FLAG__
-				   || ((__float_as_uint(inodes.x) & visibility) == 0)
+            || ((__float_as_uint(inodes.x) & visibility) == 0)
 #endif
 #if BVH_FEATURE(BVH_MOTION)
-				   || UNLIKELY(ray->time < inodes.y)
-				   || UNLIKELY(ray->time > inodes.z)
+            || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
 #endif
-				) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
-
-				ssef dist;
-				int child_mask = NODE_INTERSECT(kg,
-				                                tnear,
-				                                tfar,
+        ) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
+
+        ssef dist;
+        int child_mask = NODE_INTERSECT(kg,
+                                        tnear,
+                                        tfar,
 #ifdef __KERNEL_AVX2__
-				                                P_idir4,
+                                        P_idir4,
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-				                                org4,
+                                        org4,
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-				                                dir4,
+                                        dir4,
 #endif
-				                                idir4,
-				                                near_x, near_y, near_z,
-				                                far_x, far_y, far_z,
-				                                node_addr,
-				                                &dist);
-
-				if(child_mask != 0) {
-					float4 cnodes;
+                                        idir4,
+                                        near_x,
+                                        near_y,
+                                        near_z,
+                                        far_x,
+                                        far_y,
+                                        far_z,
+                                        node_addr,
+                                        &dist);
+
+        if (child_mask != 0) {
+          float4 cnodes;
 #if BVH_FEATURE(BVH_HAIR)
-					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-						cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
-					}
-					else
+          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
+          }
+          else
 #endif
-					{
-						cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
-					}
-
-					/* One child is hit, continue with that child. */
-					int r = __bscf(child_mask);
-					if(child_mask == 0) {
-						node_addr = __float_as_int(cnodes[r]);
-						continue;
-					}
-
-					/* Two children are hit, push far child, and continue with
-					 * closer child.
-					 */
-					int c0 = __float_as_int(cnodes[r]);
-					float d0 = ((float*)&dist)[r];
-					r = __bscf(child_mask);
-					int c1 = __float_as_int(cnodes[r]);
-					float d1 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						if(d1 < d0) {
-							node_addr = c1;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c0;
-							traversal_stack[stack_ptr].dist = d0;
-							continue;
-						}
-						else {
-							node_addr = c0;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c1;
-							traversal_stack[stack_ptr].dist = d1;
-							continue;
-						}
-					}
-
-					/* Here starts the slow path for 3 or 4 hit children. We push
-					 * all nodes onto the stack to sort them there.
-					 */
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c1;
-					traversal_stack[stack_ptr].dist = d1;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c0;
-					traversal_stack[stack_ptr].dist = d0;
-
-					/* Three children are hit, push all onto stack and sort 3
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c2 = __float_as_int(cnodes[r]);
-					float d2 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						qbvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Four children are hit, push all onto stack and sort 4
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c3 = __float_as_int(cnodes[r]);
-					float d3 = ((float*)&dist)[r];
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c3;
-					traversal_stack[stack_ptr].dist = d3;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c2;
-					traversal_stack[stack_ptr].dist = d2;
-					qbvh_stack_sort(&traversal_stack[stack_ptr],
-					                &traversal_stack[stack_ptr - 1],
-					                &traversal_stack[stack_ptr - 2],
-					                &traversal_stack[stack_ptr - 3]);
-				}
-
-				node_addr = traversal_stack[stack_ptr].addr;
-				--stack_ptr;
-			}
-
-			/* If node is leaf, fetch triangle list. */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+          {
+            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
+          }
+
+          /* One child is hit, continue with that child. */
+          int r = __bscf(child_mask);
+          if (child_mask == 0) {
+            node_addr = __float_as_int(cnodes[r]);
+            continue;
+          }
+
+          /* Two children are hit, push far child, and continue with
+           * closer child.
+           */
+          int c0 = __float_as_int(cnodes[r]);
+          float d0 = ((float *)&dist)[r];
+          r = __bscf(child_mask);
+          int c1 = __float_as_int(cnodes[r]);
+          float d1 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            if (d1 < d0) {
+              node_addr = c1;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c0;
+              traversal_stack[stack_ptr].dist = d0;
+              continue;
+            }
+            else {
+              node_addr = c0;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c1;
+              traversal_stack[stack_ptr].dist = d1;
+              continue;
+            }
+          }
+
+          /* Here starts the slow path for 3 or 4 hit children. We push
+           * all nodes onto the stack to sort them there.
+           */
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c1;
+          traversal_stack[stack_ptr].dist = d1;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c0;
+          traversal_stack[stack_ptr].dist = d0;
+
+          /* Three children are hit, push all onto stack and sort 3
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c2 = __float_as_int(cnodes[r]);
+          float d2 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            qbvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Four children are hit, push all onto stack and sort 4
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c3 = __float_as_int(cnodes[r]);
+          float d3 = ((float *)&dist)[r];
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c3;
+          traversal_stack[stack_ptr].dist = d3;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c2;
+          traversal_stack[stack_ptr].dist = d2;
+          qbvh_stack_sort(&traversal_stack[stack_ptr],
+                          &traversal_stack[stack_ptr - 1],
+                          &traversal_stack[stack_ptr - 2],
+                          &traversal_stack[stack_ptr - 3]);
+        }
+
+        node_addr = traversal_stack[stack_ptr].addr;
+        --stack_ptr;
+      }
+
+      /* If node is leaf, fetch triangle list. */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
 #ifdef __VISIBILITY_FLAG__
-				if((__float_as_uint(leaf.z) & visibility) == 0) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
+        if ((__float_as_uint(leaf.z) & visibility) == 0) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
 #endif
 
-				int prim_addr = __float_as_int(leaf.x);
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-					const uint p_type = type & PRIMITIVE_ALL;
-
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-
-					/* Primitive intersection. */
-					while(prim_addr < prim_addr2) {
-						kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
-						bool hit;
-
-						/* todo: specialized intersect functions which don't fill in
-						 * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
-						 * might give a few % performance improvement */
-
-						switch(p_type) {
-							case PRIMITIVE_TRIANGLE: {
-								hit = triangle_intersect(kg,
-								                         isect_array,
-								                         P,
-								                         dir,
-								                         visibility,
-								                         object,
-								                         prim_addr);
-								break;
-							}
+          int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+          const uint p_type = type & PRIMITIVE_ALL;
+
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+
+          /* Primitive intersection. */
+          while (prim_addr < prim_addr2) {
+            kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
+            bool hit;
+
+            /* todo: specialized intersect functions which don't fill in
+             * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
+             * might give a few % performance improvement */
+
+            switch (p_type) {
+              case PRIMITIVE_TRIANGLE: {
+                hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
+                break;
+              }
 #if BVH_FEATURE(BVH_MOTION)
-							case PRIMITIVE_MOTION_TRIANGLE: {
-								hit = motion_triangle_intersect(kg,
-								                                isect_array,
-								                                P,
-								                                dir,
-								                                ray->time,
-								                                visibility,
-								                                object,
-								                                prim_addr);
-								break;
-							}
+              case PRIMITIVE_MOTION_TRIANGLE: {
+                hit = motion_triangle_intersect(
+                    kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
+                break;
+              }
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-							case PRIMITIVE_CURVE:
-							case PRIMITIVE_MOTION_CURVE: {
-								const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
-									hit = cardinal_curve_intersect(kg,
-									                               isect_array,
-									                               P,
-									                               dir,
-									                               visibility,
-									                               object,
-									                               prim_addr,
-									                               ray->time,
-									                               curve_type,
-									                               NULL,
-									                               0, 0);
-								}
-								else {
-									hit = curve_intersect(kg,
-									                      isect_array,
-									                      P,
-									                      dir,
-									                      visibility,
-									                      object,
-									                      prim_addr,
-									                      ray->time,
-									                      curve_type,
-									                      NULL,
-									                      0, 0);
-								}
-								break;
-							}
+              case PRIMITIVE_CURVE:
+              case PRIMITIVE_MOTION_CURVE: {
+                const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+                if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+                  hit = cardinal_curve_intersect(kg,
+                                                 isect_array,
+                                                 P,
+                                                 dir,
+                                                 visibility,
+                                                 object,
+                                                 prim_addr,
+                                                 ray->time,
+                                                 curve_type,
+                                                 NULL,
+                                                 0,
+                                                 0);
+                }
+                else {
+                  hit = curve_intersect(kg,
+                                        isect_array,
+                                        P,
+                                        dir,
+                                        visibility,
+                                        object,
+                                        prim_addr,
+                                        ray->time,
+                                        curve_type,
+                                        NULL,
+                                        0,
+                                        0);
+                }
+                break;
+              }
 #endif
-							default: {
-								hit = false;
-								break;
-							}
-						}
+              default: {
+                hit = false;
+                break;
+              }
+            }
 
-						/* Shadow ray early termination. */
-						if(hit) {
-							/* detect if this surface has a shader with transparent shadows */
+            /* Shadow ray early termination. */
+            if (hit) {
+              /* detect if this surface has a shader with transparent shadows */
 
-							/* todo: optimize so primitive visibility flag indicates if
-							 * the primitive has a transparent shadow shader? */
-							int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
-							int shader = 0;
+              /* todo: optimize so primitive visibility flag indicates if
+               * the primitive has a transparent shadow shader? */
+              int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
+              int shader = 0;
 
 #ifdef __HAIR__
-							if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
+              if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
 #endif
-							{
-								shader = kernel_tex_fetch(__tri_shader, prim);
-							}
+              {
+                shader = kernel_tex_fetch(__tri_shader, prim);
+              }
 #ifdef __HAIR__
-							else {
-								float4 str = kernel_tex_fetch(__curves, prim);
-								shader = __float_as_int(str.z);
-							}
+              else {
+                float4 str = kernel_tex_fetch(__curves, prim);
+                shader = __float_as_int(str.z);
+              }
 #endif
-							int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
-							/* if no transparent shadows, all light is blocked */
-							if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
-								return true;
-							}
-							/* if maximum number of hits reached, block all light */
-							else if(*num_hits == max_hits) {
-								return true;
-							}
-
-							/* move on to next entry in intersections array */
-							isect_array++;
-							(*num_hits)++;
+              int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+
+              /* if no transparent shadows, all light is blocked */
+              if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+                return true;
+              }
+              /* if maximum number of hits reached, block all light */
+              else if (*num_hits == max_hits) {
+                return true;
+              }
+
+              /* move on to next entry in intersections array */
+              isect_array++;
+              (*num_hits)++;
 #if BVH_FEATURE(BVH_INSTANCING)
-							num_hits_in_instance++;
+              num_hits_in_instance++;
 #endif
 
-							isect_array->t = isect_t;
-						}
+              isect_array->t = isect_t;
+            }
 
-						prim_addr++;
-					}
-				}
+            prim_addr++;
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* Instance push. */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+        else {
+          /* Instance push. */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
 
 #  if BVH_FEATURE(BVH_MOTION)
-					isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+          isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #  else
-					isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+          isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
-					num_hits_in_instance = 0;
-					isect_array->t = isect_t;
+          num_hits_in_instance = 0;
+          isect_array->t = isect_t;
 
-					qbvh_near_far_idx_calc(idir,
-					                       &near_x, &near_y, &near_z,
-					                       &far_x, &far_y, &far_z);
-					tfar = ssef(isect_t);
+          qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+          tfar = ssef(isect_t);
 #  if BVH_FEATURE(BVH_HAIR)
-					dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+          dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #  endif
-					idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+          idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 #  ifdef __KERNEL_AVX2__
-					P_idir = P*idir;
-					P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+          P_idir = P * idir;
+          P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-					org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+          org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 #  endif
 
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-					node_addr = kernel_tex_fetch(__object_node, object);
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
 
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+          node_addr = kernel_tex_fetch(__object_node, object);
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* Instance pop. */
-			if(num_hits_in_instance) {
-				float t_fac;
+      /* Instance pop. */
+      if (num_hits_in_instance) {
+        float t_fac;
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
 #  else
-				bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
 #  endif
-				/* Scale isect->t to adjust for instancing. */
-				for(int i = 0; i < num_hits_in_instance; i++) {
-					(isect_array-i-1)->t *= t_fac;
-				}
-			}
-			else {
+        /* Scale isect->t to adjust for instancing. */
+        for (int i = 0; i < num_hits_in_instance; i++) {
+          (isect_array - i - 1)->t *= t_fac;
+        }
+      }
+      else {
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
 #  else
-				bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
 #  endif
-			}
+      }
 
-			isect_t = tmax;
-			isect_array->t = isect_t;
+      isect_t = tmax;
+      isect_array->t = isect_t;
 
-			qbvh_near_far_idx_calc(idir,
-			                       &near_x, &near_y, &near_z,
-			                       &far_x, &far_y, &far_z);
-			tfar = ssef(isect_t);
+      qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+      tfar = ssef(isect_t);
 #  if BVH_FEATURE(BVH_HAIR)
-			dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+      dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #  endif
-			idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+      idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 #  ifdef __KERNEL_AVX2__
-			P_idir = P*idir;
-			P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+      P_idir = P * idir;
+      P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-			org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+      org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr].addr;
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_INSTANCING) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr].addr;
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_INSTANCING) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return false;
+  return false;
 }
 
 #undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
index 40cd57aad34..9ee0f7b5933 100644
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ b/intern/cycles/kernel/bvh/qbvh_traversal.h
@@ -37,457 +37,446 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                              Intersection *isect,
                                              const uint visibility
 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-                                             ,uint *lcg_state,
+                                             ,
+                                             uint *lcg_state,
                                              float difl,
                                              float extmax
 #endif
-                                             )
+)
 {
-	/* TODO(sergey):
-	 * - Test if pushing distance on the stack helps (for non shadow rays).
-	 * - Separate version for shadow rays.
-	 * - Likely and unlikely for if() statements.
-	 * - Test restrict attribute for pointers.
-	 */
-
-	/* Traversal stack in CUDA thread-local memory. */
-	QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
-	traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-	traversal_stack[0].dist = -FLT_MAX;
-
-	/* Traversal variables in registers. */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
-	float node_dist = -FLT_MAX;
-
-	/* Ray parameters in registers. */
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
+  /* TODO(sergey):
+   * - Test if pushing distance on the stack helps (for non shadow rays).
+   * - Separate version for shadow rays.
+   * - Likely and unlikely for if() statements.
+   * - Test restrict attribute for pointers.
+   */
+
+  /* Traversal stack in CUDA thread-local memory. */
+  QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+  traversal_stack[0].dist = -FLT_MAX;
+
+  /* Traversal variables in registers. */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+  float node_dist = -FLT_MAX;
+
+  /* Ray parameters in registers. */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
-	isect->t = ray->t;
-	isect->u = 0.0f;
-	isect->v = 0.0f;
-	isect->prim = PRIM_NONE;
-	isect->object = OBJECT_NONE;
+  isect->t = ray->t;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
 
-	BVH_DEBUG_INIT();
+  BVH_DEBUG_INIT();
 
-	ssef tnear(0.0f), tfar(ray->t);
+  ssef tnear(0.0f), tfar(ray->t);
 #if BVH_FEATURE(BVH_HAIR)
-	sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+  sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #endif
-	sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+  sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 
 #ifdef __KERNEL_AVX2__
-	float3 P_idir = P*idir;
-	sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+  float3 P_idir = P * idir;
+  sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-	sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+  sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 #endif
 
-	/* Offsets to select the side that becomes the lower or upper bound. */
-	int near_x, near_y, near_z;
-	int far_x, far_y, far_z;
-	qbvh_near_far_idx_calc(idir,
-	                       &near_x, &near_y, &near_z,
-	                       &far_x, &far_y, &far_z);
-
-	/* Traversal loop. */
-	do {
-		do {
-			/* Traverse internal nodes. */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
-				(void) inodes;
-
-				if(UNLIKELY(node_dist > isect->t)
+  /* Offsets to select the side that becomes the lower or upper bound. */
+  int near_x, near_y, near_z;
+  int far_x, far_y, far_z;
+  qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+
+  /* Traversal loop. */
+  do {
+    do {
+      /* Traverse internal nodes. */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+        (void)inodes;
+
+        if (UNLIKELY(node_dist > isect->t)
 #if BVH_FEATURE(BVH_MOTION)
-				   || UNLIKELY(ray->time < inodes.y)
-				   || UNLIKELY(ray->time > inodes.z)
+            || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
 #endif
 #ifdef __VISIBILITY_FLAG__
-				   || (__float_as_uint(inodes.x) & visibility) == 0
+            || (__float_as_uint(inodes.x) & visibility) == 0
 #endif
-				 )
-				{
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					node_dist = traversal_stack[stack_ptr].dist;
-					--stack_ptr;
-					continue;
-				}
+        ) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          node_dist = traversal_stack[stack_ptr].dist;
+          --stack_ptr;
+          continue;
+        }
 
-				int child_mask;
-				ssef dist;
+        int child_mask;
+        ssef dist;
 
-				BVH_DEBUG_NEXT_NODE();
+        BVH_DEBUG_NEXT_NODE();
 
 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-				if(difl != 0.0f) {
-					/* NOTE: We extend all the child BB instead of fetching
-					 * and checking visibility flags for each of the,
-					 *
-					 * Need to test if doing opposite would be any faster.
-					 */
-					child_mask = NODE_INTERSECT_ROBUST(kg,
-					                                   tnear,
-					                                   tfar,
+        if (difl != 0.0f) {
+          /* NOTE: We extend all the child BB instead of fetching
+           * and checking visibility flags for each of the,
+           *
+           * Need to test if doing opposite would be any faster.
+           */
+          child_mask = NODE_INTERSECT_ROBUST(kg,
+                                             tnear,
+                                             tfar,
 #  ifdef __KERNEL_AVX2__
-					                                   P_idir4,
+                                             P_idir4,
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-					                                   org4,
+                                             org4,
 #  endif
 #  if BVH_FEATURE(BVH_HAIR)
-					                                   dir4,
+                                             dir4,
 #  endif
-					                                   idir4,
-					                                   near_x, near_y, near_z,
-					                                   far_x, far_y, far_z,
-					                                   node_addr,
-					                                   difl,
-					                                   &dist);
-				}
-				else
-#endif  /* BVH_HAIR_MINIMUM_WIDTH */
-				{
-					child_mask = NODE_INTERSECT(kg,
-					                            tnear,
-					                            tfar,
+                                             idir4,
+                                             near_x,
+                                             near_y,
+                                             near_z,
+                                             far_x,
+                                             far_y,
+                                             far_z,
+                                             node_addr,
+                                             difl,
+                                             &dist);
+        }
+        else
+#endif /* BVH_HAIR_MINIMUM_WIDTH */
+        {
+          child_mask = NODE_INTERSECT(kg,
+                                      tnear,
+                                      tfar,
 #ifdef __KERNEL_AVX2__
-					                            P_idir4,
+                                      P_idir4,
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-					                            org4,
+                                      org4,
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-					                            dir4,
+                                      dir4,
 #endif
-					                            idir4,
-					                            near_x, near_y, near_z,
-					                            far_x, far_y, far_z,
-					                            node_addr,
-					                            &dist);
-				}
-
-				if(child_mask != 0) {
-					float4 cnodes;
-					/* TODO(sergey): Investigate whether moving cnodes upwards
-					 * gives a speedup (will be different cache pattern but will
-					 * avoid extra check here).
-					 */
+                                      idir4,
+                                      near_x,
+                                      near_y,
+                                      near_z,
+                                      far_x,
+                                      far_y,
+                                      far_z,
+                                      node_addr,
+                                      &dist);
+        }
+
+        if (child_mask != 0) {
+          float4 cnodes;
+          /* TODO(sergey): Investigate whether moving cnodes upwards
+           * gives a speedup (will be different cache pattern but will
+           * avoid extra check here).
+           */
 #if BVH_FEATURE(BVH_HAIR)
-					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-						cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
-					}
-					else
+          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
+          }
+          else
 #endif
-					{
-						cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
-					}
-
-					/* One child is hit, continue with that child. */
-					int r = __bscf(child_mask);
-					float d0 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						node_addr = __float_as_int(cnodes[r]);
-						node_dist = d0;
-						continue;
-					}
-
-					/* Two children are hit, push far child, and continue with
-					 * closer child.
-					 */
-					int c0 = __float_as_int(cnodes[r]);
-					r = __bscf(child_mask);
-					int c1 = __float_as_int(cnodes[r]);
-					float d1 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						if(d1 < d0) {
-							node_addr = c1;
-							node_dist = d1;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c0;
-							traversal_stack[stack_ptr].dist = d0;
-							continue;
-						}
-						else {
-							node_addr = c0;
-							node_dist = d0;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c1;
-							traversal_stack[stack_ptr].dist = d1;
-							continue;
-						}
-					}
-
-					/* Here starts the slow path for 3 or 4 hit children. We push
-					 * all nodes onto the stack to sort them there.
-					 */
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c1;
-					traversal_stack[stack_ptr].dist = d1;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c0;
-					traversal_stack[stack_ptr].dist = d0;
-
-					/* Three children are hit, push all onto stack and sort 3
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c2 = __float_as_int(cnodes[r]);
-					float d2 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						qbvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						node_dist = traversal_stack[stack_ptr].dist;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Four children are hit, push all onto stack and sort 4
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c3 = __float_as_int(cnodes[r]);
-					float d3 = ((float*)&dist)[r];
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c3;
-					traversal_stack[stack_ptr].dist = d3;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c2;
-					traversal_stack[stack_ptr].dist = d2;
-					qbvh_stack_sort(&traversal_stack[stack_ptr],
-					                &traversal_stack[stack_ptr - 1],
-					                &traversal_stack[stack_ptr - 2],
-					                &traversal_stack[stack_ptr - 3]);
-				}
-
-				node_addr = traversal_stack[stack_ptr].addr;
-				node_dist = traversal_stack[stack_ptr].dist;
-				--stack_ptr;
-			}
-
-			/* If node is leaf, fetch triangle list. */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
+          {
+            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
+          }
+
+          /* One child is hit, continue with that child. */
+          int r = __bscf(child_mask);
+          float d0 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            node_addr = __float_as_int(cnodes[r]);
+            node_dist = d0;
+            continue;
+          }
+
+          /* Two children are hit, push far child, and continue with
+           * closer child.
+           */
+          int c0 = __float_as_int(cnodes[r]);
+          r = __bscf(child_mask);
+          int c1 = __float_as_int(cnodes[r]);
+          float d1 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            if (d1 < d0) {
+              node_addr = c1;
+              node_dist = d1;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c0;
+              traversal_stack[stack_ptr].dist = d0;
+              continue;
+            }
+            else {
+              node_addr = c0;
+              node_dist = d0;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c1;
+              traversal_stack[stack_ptr].dist = d1;
+              continue;
+            }
+          }
+
+          /* Here starts the slow path for 3 or 4 hit children. We push
+           * all nodes onto the stack to sort them there.
+           */
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c1;
+          traversal_stack[stack_ptr].dist = d1;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c0;
+          traversal_stack[stack_ptr].dist = d0;
+
+          /* Three children are hit, push all onto stack and sort 3
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c2 = __float_as_int(cnodes[r]);
+          float d2 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            qbvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            node_dist = traversal_stack[stack_ptr].dist;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Four children are hit, push all onto stack and sort 4
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c3 = __float_as_int(cnodes[r]);
+          float d3 = ((float *)&dist)[r];
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c3;
+          traversal_stack[stack_ptr].dist = d3;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c2;
+          traversal_stack[stack_ptr].dist = d2;
+          qbvh_stack_sort(&traversal_stack[stack_ptr],
+                          &traversal_stack[stack_ptr - 1],
+                          &traversal_stack[stack_ptr - 2],
+                          &traversal_stack[stack_ptr - 3]);
+        }
+
+        node_addr = traversal_stack[stack_ptr].addr;
+        node_dist = traversal_stack[stack_ptr].dist;
+        --stack_ptr;
+      }
+
+      /* If node is leaf, fetch triangle list. */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
 
 #ifdef __VISIBILITY_FLAG__
-				if(UNLIKELY((node_dist > isect->t) ||
-				            ((__float_as_uint(leaf.z) & visibility) == 0)))
+        if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
 #else
-				if(UNLIKELY((node_dist > isect->t)))
+        if (UNLIKELY((node_dist > isect->t)))
 #endif
-				{
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					node_dist = traversal_stack[stack_ptr].dist;
-					--stack_ptr;
-					continue;
-				}
+        {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          node_dist = traversal_stack[stack_ptr].dist;
+          --stack_ptr;
+          continue;
+        }
 
-				int prim_addr = __float_as_int(leaf.x);
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					node_dist = traversal_stack[stack_ptr].dist;
-					--stack_ptr;
-
-					/* Primitive intersection. */
-					switch(type & PRIMITIVE_ALL) {
-						case PRIMITIVE_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								if(triangle_intersect(kg,
-								                      isect,
-								                      P,
-								                      dir,
-								                      visibility,
-								                      object,
-								                      prim_addr)) {
-									tfar = ssef(isect->t);
-									/* Shadow ray early termination. */
-									if(visibility & PATH_RAY_SHADOW_OPAQUE) {
-										return true;
-									}
-								}
-							}
-							break;
-						}
+          int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          node_dist = traversal_stack[stack_ptr].dist;
+          --stack_ptr;
+
+          /* Primitive intersection. */
+          switch (type & PRIMITIVE_ALL) {
+            case PRIMITIVE_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                BVH_DEBUG_NEXT_INTERSECTION();
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
+                  tfar = ssef(isect->t);
+                  /* Shadow ray early termination. */
+                  if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+                    return true;
+                  }
+                }
+              }
+              break;
+            }
 #if BVH_FEATURE(BVH_MOTION)
-						case PRIMITIVE_MOTION_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								if(motion_triangle_intersect(kg,
-								                             isect,
-								                             P,
-								                             dir,
-								                             ray->time,
-								                             visibility,
-								                             object,
-								                             prim_addr)) {
-									tfar = ssef(isect->t);
-									/* Shadow ray early termination. */
-									if(visibility & PATH_RAY_SHADOW_OPAQUE) {
-										return true;
-									}
-								}
-							}
-							break;
-						}
-#endif  /* BVH_FEATURE(BVH_MOTION) */
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                BVH_DEBUG_NEXT_INTERSECTION();
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                if (motion_triangle_intersect(
+                        kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
+                  tfar = ssef(isect->t);
+                  /* Shadow ray early termination. */
+                  if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+                    return true;
+                  }
+                }
+              }
+              break;
+            }
+#endif /* BVH_FEATURE(BVH_MOTION) */
 #if BVH_FEATURE(BVH_HAIR)
-						case PRIMITIVE_CURVE:
-						case PRIMITIVE_MOTION_CURVE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
-								const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-								kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
-								bool hit;
-								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
-									hit = cardinal_curve_intersect(kg,
-									                               isect,
-									                               P,
-									                               dir,
-									                               visibility,
-									                               object,
-									                               prim_addr,
-									                               ray->time,
-									                               curve_type,
-									                               lcg_state,
-									                               difl,
-									                               extmax);
-								}
-								else {
-									hit = curve_intersect(kg,
-									                      isect,
-									                      P,
-									                      dir,
-									                      visibility,
-									                      object,
-									                      prim_addr,
-									                      ray->time,
-									                      curve_type,
-									                      lcg_state,
-									                      difl,
-									                      extmax);
-								}
-								if(hit) {
-									tfar = ssef(isect->t);
-									/* Shadow ray early termination. */
-									if(visibility & PATH_RAY_SHADOW_OPAQUE) {
-										return true;
-									}
-								}
-							}
-							break;
-						}
-#endif  /* BVH_FEATURE(BVH_HAIR) */
-					}
-				}
+            case PRIMITIVE_CURVE:
+            case PRIMITIVE_MOTION_CURVE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                BVH_DEBUG_NEXT_INTERSECTION();
+                const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+                kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
+                bool hit;
+                if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
+                  hit = cardinal_curve_intersect(kg,
+                                                 isect,
+                                                 P,
+                                                 dir,
+                                                 visibility,
+                                                 object,
+                                                 prim_addr,
+                                                 ray->time,
+                                                 curve_type,
+                                                 lcg_state,
+                                                 difl,
+                                                 extmax);
+                }
+                else {
+                  hit = curve_intersect(kg,
+                                        isect,
+                                        P,
+                                        dir,
+                                        visibility,
+                                        object,
+                                        prim_addr,
+                                        ray->time,
+                                        curve_type,
+                                        lcg_state,
+                                        difl,
+                                        extmax);
+                }
+                if (hit) {
+                  tfar = ssef(isect->t);
+                  /* Shadow ray early termination. */
+                  if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+                    return true;
+                  }
+                }
+              }
+              break;
+            }
+#endif /* BVH_FEATURE(BVH_HAIR) */
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* Instance push. */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
+        else {
+          /* Instance push. */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
 
 #  if BVH_FEATURE(BVH_MOTION)
-					qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
+          qbvh_instance_motion_push(
+              kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
 #  else
-					qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
+          qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
 #  endif
 
-					qbvh_near_far_idx_calc(idir,
-					                       &near_x, &near_y, &near_z,
-					                       &far_x, &far_y, &far_z);
-					tfar = ssef(isect->t);
+          qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+          tfar = ssef(isect->t);
 #  if BVH_FEATURE(BVH_HAIR)
-					dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+          dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #  endif
-					idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+          idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 #  ifdef __KERNEL_AVX2__
-					P_idir = P*idir;
-					P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+          P_idir = P * idir;
+          P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-					org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+          org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 #  endif
 
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-					traversal_stack[stack_ptr].dist = -FLT_MAX;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+          traversal_stack[stack_ptr].dist = -FLT_MAX;
 
-					node_addr = kernel_tex_fetch(__object_node, object);
+          node_addr = kernel_tex_fetch(__object_node, object);
 
-					BVH_DEBUG_NEXT_INSTANCE();
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+          BVH_DEBUG_NEXT_INSTANCE();
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* Instance pop. */
+      /* Instance pop. */
 #  if BVH_FEATURE(BVH_MOTION)
-			isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-			isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
-			qbvh_near_far_idx_calc(idir,
-			                       &near_x, &near_y, &near_z,
-			                       &far_x, &far_y, &far_z);
-			tfar = ssef(isect->t);
+      qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+      tfar = ssef(isect->t);
 #  if BVH_FEATURE(BVH_HAIR)
-			dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+      dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #  endif
-			idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+      idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 #  ifdef __KERNEL_AVX2__
-			P_idir = P*idir;
-			P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+      P_idir = P * idir;
+      P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-			org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+      org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr].addr;
-			node_dist = traversal_stack[stack_ptr].dist;
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_INSTANCING) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr].addr;
+      node_dist = traversal_stack[stack_ptr].dist;
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_INSTANCING) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return (isect->prim != PRIM_NONE);
+  return (isect->prim != PRIM_NONE);
 }
 
 #undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h
index 6790bfa6c83..e4eaed04467 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume.h
@@ -33,331 +33,335 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                              Intersection *isect,
                                              const uint visibility)
 {
-	/* TODO(sergey):
-	 * - Test if pushing distance on the stack helps.
-	 * - Likely and unlikely for if() statements.
-	 * - Test restrict attribute for pointers.
-	 */
-
-	/* Traversal stack in CUDA thread-local memory. */
-	QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
-	traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-	/* Traversal variables in registers. */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
-
-	/* Ray parameters in registers. */
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
+  /* TODO(sergey):
+   * - Test if pushing distance on the stack helps.
+   * - Likely and unlikely for if() statements.
+   * - Test restrict attribute for pointers.
+   */
+
+  /* Traversal stack in CUDA thread-local memory. */
+  QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+
+  /* Traversal variables in registers. */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* Ray parameters in registers. */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
-	isect->t = ray->t;
-	isect->u = 0.0f;
-	isect->v = 0.0f;
-	isect->prim = PRIM_NONE;
-	isect->object = OBJECT_NONE;
+  isect->t = ray->t;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
 
-	ssef tnear(0.0f), tfar(ray->t);
+  ssef tnear(0.0f), tfar(ray->t);
 #if BVH_FEATURE(BVH_HAIR)
-	sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+  sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #endif
-	sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+  sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 
 #ifdef __KERNEL_AVX2__
-	float3 P_idir = P*idir;
-	sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+  float3 P_idir = P * idir;
+  sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-	sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
+  sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
 #endif
 
-	/* Offsets to select the side that becomes the lower or upper bound. */
-	int near_x, near_y, near_z;
-	int far_x, far_y, far_z;
-	qbvh_near_far_idx_calc(idir,
-	                       &near_x, &near_y, &near_z,
-	                       &far_x, &far_y, &far_z);
+  /* Offsets to select the side that becomes the lower or upper bound. */
+  int near_x, near_y, near_z;
+  int far_x, far_y, far_z;
+  qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
 
-	/* Traversal loop. */
-	do {
-		do {
-			/* Traverse internal nodes. */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+  /* Traversal loop. */
+  do {
+    do {
+      /* Traverse internal nodes. */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
 #ifdef __VISIBILITY_FLAG__
-				if((__float_as_uint(inodes.x) & visibility) == 0) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
+        if ((__float_as_uint(inodes.x) & visibility) == 0) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
 #endif
 
-				ssef dist;
-				int child_mask = NODE_INTERSECT(kg,
-				                                tnear,
-				                                tfar,
+        ssef dist;
+        int child_mask = NODE_INTERSECT(kg,
+                                        tnear,
+                                        tfar,
 #ifdef __KERNEL_AVX2__
-				                                P_idir4,
+                                        P_idir4,
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-				                                org4,
+                                        org4,
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-				                                dir4,
+                                        dir4,
 #endif
-				                                idir4,
-				                                near_x, near_y, near_z,
-				                                far_x, far_y, far_z,
-				                                node_addr,
-				                                &dist);
-
-				if(child_mask != 0) {
-					float4 cnodes;
+                                        idir4,
+                                        near_x,
+                                        near_y,
+                                        near_z,
+                                        far_x,
+                                        far_y,
+                                        far_z,
+                                        node_addr,
+                                        &dist);
+
+        if (child_mask != 0) {
+          float4 cnodes;
 #if BVH_FEATURE(BVH_HAIR)
-					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-						cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
-					}
-					else
+          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
+          }
+          else
 #endif
-					{
-						cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
-					}
-
-					/* One child is hit, continue with that child. */
-					int r = __bscf(child_mask);
-					if(child_mask == 0) {
-						node_addr = __float_as_int(cnodes[r]);
-						continue;
-					}
-
-					/* Two children are hit, push far child, and continue with
-					 * closer child.
-					 */
-					int c0 = __float_as_int(cnodes[r]);
-					float d0 = ((float*)&dist)[r];
-					r = __bscf(child_mask);
-					int c1 = __float_as_int(cnodes[r]);
-					float d1 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						if(d1 < d0) {
-							node_addr = c1;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c0;
-							traversal_stack[stack_ptr].dist = d0;
-							continue;
-						}
-						else {
-							node_addr = c0;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c1;
-							traversal_stack[stack_ptr].dist = d1;
-							continue;
-						}
-					}
-
-					/* Here starts the slow path for 3 or 4 hit children. We push
-					 * all nodes onto the stack to sort them there.
-					 */
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c1;
-					traversal_stack[stack_ptr].dist = d1;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c0;
-					traversal_stack[stack_ptr].dist = d0;
-
-					/* Three children are hit, push all onto stack and sort 3
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c2 = __float_as_int(cnodes[r]);
-					float d2 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						qbvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Four children are hit, push all onto stack and sort 4
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c3 = __float_as_int(cnodes[r]);
-					float d3 = ((float*)&dist)[r];
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c3;
-					traversal_stack[stack_ptr].dist = d3;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c2;
-					traversal_stack[stack_ptr].dist = d2;
-					qbvh_stack_sort(&traversal_stack[stack_ptr],
-					                &traversal_stack[stack_ptr - 1],
-					                &traversal_stack[stack_ptr - 2],
-					                &traversal_stack[stack_ptr - 3]);
-				}
-
-				node_addr = traversal_stack[stack_ptr].addr;
-				--stack_ptr;
-			}
-
-			/* If node is leaf, fetch triangle list. */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-
-				if((__float_as_uint(leaf.z) & visibility) == 0) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
-
-				int prim_addr = __float_as_int(leaf.x);
+          {
+            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
+          }
+
+          /* One child is hit, continue with that child. */
+          int r = __bscf(child_mask);
+          if (child_mask == 0) {
+            node_addr = __float_as_int(cnodes[r]);
+            continue;
+          }
+
+          /* Two children are hit, push far child, and continue with
+           * closer child.
+           */
+          int c0 = __float_as_int(cnodes[r]);
+          float d0 = ((float *)&dist)[r];
+          r = __bscf(child_mask);
+          int c1 = __float_as_int(cnodes[r]);
+          float d1 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            if (d1 < d0) {
+              node_addr = c1;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c0;
+              traversal_stack[stack_ptr].dist = d0;
+              continue;
+            }
+            else {
+              node_addr = c0;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c1;
+              traversal_stack[stack_ptr].dist = d1;
+              continue;
+            }
+          }
+
+          /* Here starts the slow path for 3 or 4 hit children. We push
+           * all nodes onto the stack to sort them there.
+           */
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c1;
+          traversal_stack[stack_ptr].dist = d1;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c0;
+          traversal_stack[stack_ptr].dist = d0;
+
+          /* Three children are hit, push all onto stack and sort 3
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c2 = __float_as_int(cnodes[r]);
+          float d2 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            qbvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Four children are hit, push all onto stack and sort 4
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c3 = __float_as_int(cnodes[r]);
+          float d3 = ((float *)&dist)[r];
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c3;
+          traversal_stack[stack_ptr].dist = d3;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c2;
+          traversal_stack[stack_ptr].dist = d2;
+          qbvh_stack_sort(&traversal_stack[stack_ptr],
+                          &traversal_stack[stack_ptr - 1],
+                          &traversal_stack[stack_ptr - 2],
+                          &traversal_stack[stack_ptr - 3]);
+        }
+
+        node_addr = traversal_stack[stack_ptr].addr;
+        --stack_ptr;
+      }
+
+      /* If node is leaf, fetch triangle list. */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+
+        if ((__float_as_uint(leaf.z) & visibility) == 0) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
+
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-					const uint p_type = type & PRIMITIVE_ALL;
-
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-
-					/* Primitive intersection. */
-					switch(p_type) {
-						case PRIMITIVE_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* Only primitives from volume object. */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								/* Intersect ray against primitive. */
-								triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
-							}
-							break;
-						}
+          int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+          const uint p_type = type & PRIMITIVE_ALL;
+
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+
+          /* Primitive intersection. */
+          switch (p_type) {
+            case PRIMITIVE_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* Only primitives from volume object. */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                /* Intersect ray against primitive. */
+                triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
+              }
+              break;
+            }
 #if BVH_FEATURE(BVH_MOTION)
-						case PRIMITIVE_MOTION_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* Only primitives from volume object. */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								/* Intersect ray against primitive. */
-								motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
-							}
-							break;
-						}
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* Only primitives from volume object. */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                /* Intersect ray against primitive. */
+                motion_triangle_intersect(
+                    kg, isect, P, dir, ray->time, visibility, object, prim_addr);
+              }
+              break;
+            }
 #endif
-					}
-				}
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* Instance push. */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
-					int object_flag = kernel_tex_fetch(__object_flag, object);
-					if(object_flag & SD_OBJECT_HAS_VOLUME) {
+        else {
+          /* Instance push. */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+          int object_flag = kernel_tex_fetch(__object_flag, object);
+          if (object_flag & SD_OBJECT_HAS_VOLUME) {
 #  if BVH_FEATURE(BVH_MOTION)
-						isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+            isect->t = bvh_instance_motion_push(
+                kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-						isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
+            isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
-						qbvh_near_far_idx_calc(idir,
-						                       &near_x, &near_y, &near_z,
-						                       &far_x, &far_y, &far_z);
-						tfar = ssef(isect->t);
+            qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+            tfar = ssef(isect->t);
 #  if BVH_FEATURE(BVH_HAIR)
-						dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+            dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #  endif
-						idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+            idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 #  ifdef __KERNEL_AVX2__
-						P_idir = P*idir;
-						P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+            P_idir = P * idir;
+            P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-						org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+            org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 #  endif
 
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-						node_addr = kernel_tex_fetch(__object_node, object);
-					}
-					else {
-						/* Pop. */
-						object = OBJECT_NONE;
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-					}
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+
+            node_addr = kernel_tex_fetch(__object_node, object);
+          }
+          else {
+            /* Pop. */
+            object = OBJECT_NONE;
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+          }
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* Instance pop. */
+      /* Instance pop. */
 #  if BVH_FEATURE(BVH_MOTION)
-			isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-			isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
-			qbvh_near_far_idx_calc(idir,
-			                       &near_x, &near_y, &near_z,
-			                       &far_x, &far_y, &far_z);
-			tfar = ssef(isect->t);
+      qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+      tfar = ssef(isect->t);
 #  if BVH_FEATURE(BVH_HAIR)
-			dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+      dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #  endif
-			idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+      idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 #  ifdef __KERNEL_AVX2__
-			P_idir = P*idir;
-			P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+      P_idir = P * idir;
+      P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-			org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+      org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr].addr;
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_INSTANCING) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr].addr;
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_INSTANCING) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return (isect->prim != PRIM_NONE);
+  return (isect->prim != PRIM_NONE);
 }
 
 #undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h
index 63d79b6fe34..eddc48c487e 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h
@@ -34,405 +34,411 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                              const uint max_hits,
                                              const uint visibility)
 {
-	/* TODO(sergey):
-	 * - Test if pushing distance on the stack helps.
-	 * - Likely and unlikely for if() statements.
-	 * - Test restrict attribute for pointers.
-	 */
-
-	/* Traversal stack in CUDA thread-local memory. */
-	QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
-	traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-	/* Traversal variables in registers. */
-	int stack_ptr = 0;
-	int node_addr = kernel_data.bvh.root;
-
-	/* Ray parameters in registers. */
-	const float tmax = ray->t;
-	float3 P = ray->P;
-	float3 dir = bvh_clamp_direction(ray->D);
-	float3 idir = bvh_inverse_direction(dir);
-	int object = OBJECT_NONE;
-	float isect_t = tmax;
+  /* TODO(sergey):
+   * - Test if pushing distance on the stack helps.
+   * - Likely and unlikely for if() statements.
+   * - Test restrict attribute for pointers.
+   */
+
+  /* Traversal stack in CUDA thread-local memory. */
+  QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
+  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
+
+  /* Traversal variables in registers. */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* Ray parameters in registers. */
+  const float tmax = ray->t;
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  float isect_t = tmax;
 
 #if BVH_FEATURE(BVH_MOTION)
-	Transform ob_itfm;
+  Transform ob_itfm;
 #endif
 
-	uint num_hits = 0;
-	isect_array->t = tmax;
+  uint num_hits = 0;
+  isect_array->t = tmax;
 
 #if BVH_FEATURE(BVH_INSTANCING)
-	int num_hits_in_instance = 0;
+  int num_hits_in_instance = 0;
 #endif
 
-	ssef tnear(0.0f), tfar(isect_t);
+  ssef tnear(0.0f), tfar(isect_t);
 #if BVH_FEATURE(BVH_HAIR)
-	sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+  sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #endif
-	sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+  sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 
 #ifdef __KERNEL_AVX2__
-	float3 P_idir = P*idir;
-	sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+  float3 P_idir = P * idir;
+  sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-	sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
+  sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
 #endif
 
-	/* Offsets to select the side that becomes the lower or upper bound. */
-	int near_x, near_y, near_z;
-	int far_x, far_y, far_z;
-	qbvh_near_far_idx_calc(idir,
-	                       &near_x, &near_y, &near_z,
-	                       &far_x, &far_y, &far_z);
+  /* Offsets to select the side that becomes the lower or upper bound. */
+  int near_x, near_y, near_z;
+  int far_x, far_y, far_z;
+  qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
 
-	/* Traversal loop. */
-	do {
-		do {
-			/* Traverse internal nodes. */
-			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-				float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+  /* Traversal loop. */
+  do {
+    do {
+      /* Traverse internal nodes. */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
 #ifdef __VISIBILITY_FLAG__
-				if((__float_as_uint(inodes.x) & visibility) == 0) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
+        if ((__float_as_uint(inodes.x) & visibility) == 0) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
 #endif
 
-				ssef dist;
-				int child_mask = NODE_INTERSECT(kg,
-				                                tnear,
-				                                tfar,
+        ssef dist;
+        int child_mask = NODE_INTERSECT(kg,
+                                        tnear,
+                                        tfar,
 #ifdef __KERNEL_AVX2__
-				                                P_idir4,
+                                        P_idir4,
 #endif
 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-				                                org4,
+                                        org4,
 #endif
 #if BVH_FEATURE(BVH_HAIR)
-				                                dir4,
+                                        dir4,
 #endif
-				                                idir4,
-				                                near_x, near_y, near_z,
-				                                far_x, far_y, far_z,
-				                                node_addr,
-				                                &dist);
-
-				if(child_mask != 0) {
-					float4 cnodes;
+                                        idir4,
+                                        near_x,
+                                        near_y,
+                                        near_z,
+                                        far_x,
+                                        far_y,
+                                        far_z,
+                                        node_addr,
+                                        &dist);
+
+        if (child_mask != 0) {
+          float4 cnodes;
 #if BVH_FEATURE(BVH_HAIR)
-					if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-						cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
-					}
-					else
+          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
+          }
+          else
 #endif
-					{
-						cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
-					}
-
-					/* One child is hit, continue with that child. */
-					int r = __bscf(child_mask);
-					if(child_mask == 0) {
-						node_addr = __float_as_int(cnodes[r]);
-						continue;
-					}
-
-					/* Two children are hit, push far child, and continue with
-					 * closer child.
-					 */
-					int c0 = __float_as_int(cnodes[r]);
-					float d0 = ((float*)&dist)[r];
-					r = __bscf(child_mask);
-					int c1 = __float_as_int(cnodes[r]);
-					float d1 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						if(d1 < d0) {
-							node_addr = c1;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c0;
-							traversal_stack[stack_ptr].dist = d0;
-							continue;
-						}
-						else {
-							node_addr = c0;
-							++stack_ptr;
-							kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-							traversal_stack[stack_ptr].addr = c1;
-							traversal_stack[stack_ptr].dist = d1;
-							continue;
-						}
-					}
-
-					/* Here starts the slow path for 3 or 4 hit children. We push
-					 * all nodes onto the stack to sort them there.
-					 */
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c1;
-					traversal_stack[stack_ptr].dist = d1;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c0;
-					traversal_stack[stack_ptr].dist = d0;
-
-					/* Three children are hit, push all onto stack and sort 3
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c2 = __float_as_int(cnodes[r]);
-					float d2 = ((float*)&dist)[r];
-					if(child_mask == 0) {
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = c2;
-						traversal_stack[stack_ptr].dist = d2;
-						qbvh_stack_sort(&traversal_stack[stack_ptr],
-						                &traversal_stack[stack_ptr - 1],
-						                &traversal_stack[stack_ptr - 2]);
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-						continue;
-					}
-
-					/* Four children are hit, push all onto stack and sort 4
-					 * stack items, continue with closest child.
-					 */
-					r = __bscf(child_mask);
-					int c3 = __float_as_int(cnodes[r]);
-					float d3 = ((float*)&dist)[r];
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c3;
-					traversal_stack[stack_ptr].dist = d3;
-					++stack_ptr;
-					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-					traversal_stack[stack_ptr].addr = c2;
-					traversal_stack[stack_ptr].dist = d2;
-					qbvh_stack_sort(&traversal_stack[stack_ptr],
-					                &traversal_stack[stack_ptr - 1],
-					                &traversal_stack[stack_ptr - 2],
-					                &traversal_stack[stack_ptr - 3]);
-				}
-
-				node_addr = traversal_stack[stack_ptr].addr;
-				--stack_ptr;
-			}
-
-			/* If node is leaf, fetch triangle list. */
-			if(node_addr < 0) {
-				float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
-
-				if((__float_as_uint(leaf.z) & visibility) == 0) {
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-					continue;
-				}
-
-				int prim_addr = __float_as_int(leaf.x);
+          {
+            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
+          }
+
+          /* One child is hit, continue with that child. */
+          int r = __bscf(child_mask);
+          if (child_mask == 0) {
+            node_addr = __float_as_int(cnodes[r]);
+            continue;
+          }
+
+          /* Two children are hit, push far child, and continue with
+           * closer child.
+           */
+          int c0 = __float_as_int(cnodes[r]);
+          float d0 = ((float *)&dist)[r];
+          r = __bscf(child_mask);
+          int c1 = __float_as_int(cnodes[r]);
+          float d1 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            if (d1 < d0) {
+              node_addr = c1;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c0;
+              traversal_stack[stack_ptr].dist = d0;
+              continue;
+            }
+            else {
+              node_addr = c0;
+              ++stack_ptr;
+              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+              traversal_stack[stack_ptr].addr = c1;
+              traversal_stack[stack_ptr].dist = d1;
+              continue;
+            }
+          }
+
+          /* Here starts the slow path for 3 or 4 hit children. We push
+           * all nodes onto the stack to sort them there.
+           */
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c1;
+          traversal_stack[stack_ptr].dist = d1;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c0;
+          traversal_stack[stack_ptr].dist = d0;
+
+          /* Three children are hit, push all onto stack and sort 3
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c2 = __float_as_int(cnodes[r]);
+          float d2 = ((float *)&dist)[r];
+          if (child_mask == 0) {
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = c2;
+            traversal_stack[stack_ptr].dist = d2;
+            qbvh_stack_sort(&traversal_stack[stack_ptr],
+                            &traversal_stack[stack_ptr - 1],
+                            &traversal_stack[stack_ptr - 2]);
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+            continue;
+          }
+
+          /* Four children are hit, push all onto stack and sort 4
+           * stack items, continue with closest child.
+           */
+          r = __bscf(child_mask);
+          int c3 = __float_as_int(cnodes[r]);
+          float d3 = ((float *)&dist)[r];
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c3;
+          traversal_stack[stack_ptr].dist = d3;
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+          traversal_stack[stack_ptr].addr = c2;
+          traversal_stack[stack_ptr].dist = d2;
+          qbvh_stack_sort(&traversal_stack[stack_ptr],
+                          &traversal_stack[stack_ptr - 1],
+                          &traversal_stack[stack_ptr - 2],
+                          &traversal_stack[stack_ptr - 3]);
+        }
+
+        node_addr = traversal_stack[stack_ptr].addr;
+        --stack_ptr;
+      }
+
+      /* If node is leaf, fetch triangle list. */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+
+        if ((__float_as_uint(leaf.z) & visibility) == 0) {
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+          continue;
+        }
+
+        int prim_addr = __float_as_int(leaf.x);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-				if(prim_addr >= 0) {
+        if (prim_addr >= 0) {
 #endif
-					int prim_addr2 = __float_as_int(leaf.y);
-					const uint type = __float_as_int(leaf.w);
-					const uint p_type = type & PRIMITIVE_ALL;
-					bool hit;
-
-					/* Pop. */
-					node_addr = traversal_stack[stack_ptr].addr;
-					--stack_ptr;
-
-					/* Primitive intersection. */
-					switch(p_type) {
-						case PRIMITIVE_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* Only primitives from volume object. */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								/* Intersect ray against primitive. */
-								hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
-								if(hit) {
-									/* Move on to next entry in intersections array. */
-									isect_array++;
-									num_hits++;
+          int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+          const uint p_type = type & PRIMITIVE_ALL;
+          bool hit;
+
+          /* Pop. */
+          node_addr = traversal_stack[stack_ptr].addr;
+          --stack_ptr;
+
+          /* Primitive intersection. */
+          switch (p_type) {
+            case PRIMITIVE_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* Only primitives from volume object. */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                /* Intersect ray against primitive. */
+                hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
+                if (hit) {
+                  /* Move on to next entry in intersections array. */
+                  isect_array++;
+                  num_hits++;
 #if BVH_FEATURE(BVH_INSTANCING)
-									num_hits_in_instance++;
+                  num_hits_in_instance++;
 #endif
-									isect_array->t = isect_t;
-									if(num_hits == max_hits) {
+                  isect_array->t = isect_t;
+                  if (num_hits == max_hits) {
 #if BVH_FEATURE(BVH_INSTANCING)
-										if(object != OBJECT_NONE) {
+                    if (object != OBJECT_NONE) {
 #  if BVH_FEATURE(BVH_MOTION)
-											float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+                      float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
 #  else
-											Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-											float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+                      Transform itfm = object_fetch_transform(
+                          kg, object, OBJECT_INVERSE_TRANSFORM);
+                      float t_fac = 1.0f / len(transform_direction(&itfm, dir));
 #  endif
-											for(int i = 0; i < num_hits_in_instance; i++) {
-												(isect_array-i-1)->t *= t_fac;
-											}
-										}
-#endif  /* BVH_FEATURE(BVH_INSTANCING) */
-										return num_hits;
-									}
-								}
-							}
-							break;
-						}
+                      for (int i = 0; i < num_hits_in_instance; i++) {
+                        (isect_array - i - 1)->t *= t_fac;
+                      }
+                    }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+                    return num_hits;
+                  }
+                }
+              }
+              break;
+            }
 #if BVH_FEATURE(BVH_MOTION)
-						case PRIMITIVE_MOTION_TRIANGLE: {
-							for(; prim_addr < prim_addr2; prim_addr++) {
-								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-								/* Only primitives from volume object. */
-								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
-								int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-								if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-									continue;
-								}
-								/* Intersect ray against primitive. */
-								hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
-								if(hit) {
-									/* Move on to next entry in intersections array. */
-									isect_array++;
-									num_hits++;
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* Only primitives from volume object. */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                /* Intersect ray against primitive. */
+                hit = motion_triangle_intersect(
+                    kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
+                if (hit) {
+                  /* Move on to next entry in intersections array. */
+                  isect_array++;
+                  num_hits++;
 #  if BVH_FEATURE(BVH_INSTANCING)
-									num_hits_in_instance++;
+                  num_hits_in_instance++;
 #  endif
-									isect_array->t = isect_t;
-									if(num_hits == max_hits) {
+                  isect_array->t = isect_t;
+                  if (num_hits == max_hits) {
 #  if BVH_FEATURE(BVH_INSTANCING)
-										if(object != OBJECT_NONE) {
+                    if (object != OBJECT_NONE) {
 #    if BVH_FEATURE(BVH_MOTION)
-											float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+                      float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
 #    else
-											Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-											float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+                      Transform itfm = object_fetch_transform(
+                          kg, object, OBJECT_INVERSE_TRANSFORM);
+                      float t_fac = 1.0f / len(transform_direction(&itfm, dir));
 #    endif
-											for(int i = 0; i < num_hits_in_instance; i++) {
-												(isect_array-i-1)->t *= t_fac;
-											}
-										}
-#  endif  /* BVH_FEATURE(BVH_INSTANCING) */
-										return num_hits;
-									}
-								}
-							}
-							break;
-						}
+                      for (int i = 0; i < num_hits_in_instance; i++) {
+                        (isect_array - i - 1)->t *= t_fac;
+                      }
+                    }
+#  endif /* BVH_FEATURE(BVH_INSTANCING) */
+                    return num_hits;
+                  }
+                }
+              }
+              break;
+            }
 #endif
-					}
-				}
+          }
+        }
 #if BVH_FEATURE(BVH_INSTANCING)
-				else {
-					/* Instance push. */
-					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
-					int object_flag = kernel_tex_fetch(__object_flag, object);
-					if(object_flag & SD_OBJECT_HAS_VOLUME) {
+        else {
+          /* Instance push. */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+          int object_flag = kernel_tex_fetch(__object_flag, object);
+          if (object_flag & SD_OBJECT_HAS_VOLUME) {
 #  if BVH_FEATURE(BVH_MOTION)
-						isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
+            isect_t = bvh_instance_motion_push(
+                kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #  else
-						isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
+            isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
-						qbvh_near_far_idx_calc(idir,
-						                       &near_x, &near_y, &near_z,
-						                       &far_x, &far_y, &far_z);
-						tfar = ssef(isect_t);
-						idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+            qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+            tfar = ssef(isect_t);
+            idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 #  if BVH_FEATURE(BVH_HAIR)
-						dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+            dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #  endif
 #  ifdef __KERNEL_AVX2__
-						P_idir = P*idir;
-						P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+            P_idir = P * idir;
+            P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-						org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+            org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 #  endif
 
-						num_hits_in_instance = 0;
-						isect_array->t = isect_t;
-
-						++stack_ptr;
-						kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-						traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-						node_addr = kernel_tex_fetch(__object_node, object);
-					}
-					else {
-						/* Pop. */
-						object = OBJECT_NONE;
-						node_addr = traversal_stack[stack_ptr].addr;
-						--stack_ptr;
-					}
-				}
-			}
-#endif  /* FEATURE(BVH_INSTANCING) */
-		} while(node_addr != ENTRYPOINT_SENTINEL);
+            num_hits_in_instance = 0;
+            isect_array->t = isect_t;
+
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
+            traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
+
+            node_addr = kernel_tex_fetch(__object_node, object);
+          }
+          else {
+            /* Pop. */
+            object = OBJECT_NONE;
+            node_addr = traversal_stack[stack_ptr].addr;
+            --stack_ptr;
+          }
+        }
+      }
+#endif /* FEATURE(BVH_INSTANCING) */
+    } while (node_addr != ENTRYPOINT_SENTINEL);
 
 #if BVH_FEATURE(BVH_INSTANCING)
-		if(stack_ptr >= 0) {
-			kernel_assert(object != OBJECT_NONE);
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
 
-			/* Instance pop. */
-			if(num_hits_in_instance) {
-				float t_fac;
+      /* Instance pop. */
+      if (num_hits_in_instance) {
+        float t_fac;
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
 #  else
-				bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
 #  endif
-				/* Scale isect->t to adjust for instancing. */
-				for(int i = 0; i < num_hits_in_instance; i++) {
-					(isect_array-i-1)->t *= t_fac;
-				}
-			}
-			else {
+        /* Scale isect->t to adjust for instancing. */
+        for (int i = 0; i < num_hits_in_instance; i++) {
+          (isect_array - i - 1)->t *= t_fac;
+        }
+      }
+      else {
 #  if BVH_FEATURE(BVH_MOTION)
-				bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
 #  else
-				bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
 #  endif
-			}
+      }
 
-			isect_t = tmax;
-			isect_array->t = isect_t;
+      isect_t = tmax;
+      isect_array->t = isect_t;
 
-			qbvh_near_far_idx_calc(idir,
-			                       &near_x, &near_y, &near_z,
-			                       &far_x, &far_y, &far_z);
-			tfar = ssef(isect_t);
+      qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
+      tfar = ssef(isect_t);
 #  if BVH_FEATURE(BVH_HAIR)
-			dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+      dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
 #  endif
-			idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+      idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
 #  ifdef __KERNEL_AVX2__
-			P_idir = P*idir;
-			P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+      P_idir = P * idir;
+      P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
 #  endif
 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-			org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+      org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 #  endif
 
-			object = OBJECT_NONE;
-			node_addr = traversal_stack[stack_ptr].addr;
-			--stack_ptr;
-		}
-#endif  /* FEATURE(BVH_INSTANCING) */
-	} while(node_addr != ENTRYPOINT_SENTINEL);
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr].addr;
+      --stack_ptr;
+    }
+#endif /* FEATURE(BVH_INSTANCING) */
+  } while (node_addr != ENTRYPOINT_SENTINEL);
 
-	return num_hits;
+  return num_hits;
 }
 
 #undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h
index acccba9ecec..341d1e16eb1 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -18,69 +18,72 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType type, float3 weight)
 {
-	kernel_assert(size <= sizeof(ShaderClosure));
+  kernel_assert(size <= sizeof(ShaderClosure));
 
-	if(sd->num_closure_left == 0)
-		return NULL;
+  if (sd->num_closure_left == 0)
+    return NULL;
 
-	ShaderClosure *sc = &sd->closure[sd->num_closure];
+  ShaderClosure *sc = &sd->closure[sd->num_closure];
 
-	sc->type = type;
-	sc->weight = weight;
+  sc->type = type;
+  sc->weight = weight;
 
-	sd->num_closure++;
-	sd->num_closure_left--;
+  sd->num_closure++;
+  sd->num_closure_left--;
 
-	return sc;
+  return sc;
 }
 
 ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
 {
-	/* Allocate extra space for closure that need more parameters. We allocate
-	 * in chunks of sizeof(ShaderClosure) starting from the end of the closure
-	 * array.
-	 *
-	 * This lets us keep the same fast array iteration over closures, as we
-	 * found linked list iteration and iteration with skipping to be slower. */
-	int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure));
-
-	if(num_extra > sd->num_closure_left) {
-		/* Remove previous closure if it was allocated. */
-		sd->num_closure--;
-		sd->num_closure_left++;
-		return NULL;
-	}
-
-	sd->num_closure_left -= num_extra;
-	return (ccl_addr_space void*)(sd->closure + sd->num_closure + sd->num_closure_left);
+  /* Allocate extra space for closure that need more parameters. We allocate
+   * in chunks of sizeof(ShaderClosure) starting from the end of the closure
+   * array.
+   *
+   * This lets us keep the same fast array iteration over closures, as we
+   * found linked list iteration and iteration with skipping to be slower. */
+  int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure));
+
+  if (num_extra > sd->num_closure_left) {
+    /* Remove previous closure if it was allocated. */
+    sd->num_closure--;
+    sd->num_closure_left++;
+    return NULL;
+  }
+
+  sd->num_closure_left -= num_extra;
+  return (ccl_addr_space void *)(sd->closure + sd->num_closure + sd->num_closure_left);
 }
 
 ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
 {
-	ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+  ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
 
-	if(sc == NULL)
-		return NULL;
+  if (sc == NULL)
+    return NULL;
 
-	float sample_weight = fabsf(average(weight));
-	sc->sample_weight = sample_weight;
-	return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
+  float sample_weight = fabsf(average(weight));
+  sc->sample_weight = sample_weight;
+  return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
 }
 
 #ifdef __OSL__
-ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, int size, float3 weight, void *data)
+ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd,
+                                                int size,
+                                                float3 weight,
+                                                void *data)
 {
-	ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+  ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
 
-	if(!sc)
-		return NULL;
+  if (!sc)
+    return NULL;
 
-	memcpy((void *)sc, data, size);
+  memcpy((void *)sc, data, size);
 
-	float sample_weight = fabsf(average(weight));
-	sc->weight = weight;
-	sc->sample_weight = sample_weight;
-	return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
+  float sample_weight = fabsf(average(weight));
+  sc->weight = weight;
+  sc->sample_weight = sample_weight;
+  return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
 }
 #endif
 
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 3a9629ea9d7..5e26f90a878 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -39,38 +39,38 @@ CCL_NAMESPACE_BEGIN
  * 0 for singular closures and 1 otherwise. */
 ccl_device_inline float bsdf_get_specular_roughness_squared(const ShaderClosure *sc)
 {
-	if(CLOSURE_IS_BSDF_SINGULAR(sc->type)) {
-		return 0.0f;
-	}
+  if (CLOSURE_IS_BSDF_SINGULAR(sc->type)) {
+    return 0.0f;
+  }
 
-	if(CLOSURE_IS_BSDF_MICROFACET(sc->type)) {
-		MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
-		return bsdf->alpha_x*bsdf->alpha_y;
-	}
+  if (CLOSURE_IS_BSDF_MICROFACET(sc->type)) {
+    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+    return bsdf->alpha_x * bsdf->alpha_y;
+  }
 
-	return 1.0f;
+  return 1.0f;
 }
 
 ccl_device_inline float bsdf_get_roughness_squared(const ShaderClosure *sc)
 {
-	/* This version includes diffuse, mainly for baking Principled BSDF
-	 * where specular and metallic zero otherwise does not bake the
-	 * specified roughness parameter. */
-	if(sc->type == CLOSURE_BSDF_OREN_NAYAR_ID) {
-		OrenNayarBsdf *bsdf = (OrenNayarBsdf*)sc;
-		return sqr(sqr(bsdf->roughness));
-	}
+  /* This version includes diffuse, mainly for baking Principled BSDF
+   * where specular and metallic zero otherwise does not bake the
+   * specified roughness parameter. */
+  if (sc->type == CLOSURE_BSDF_OREN_NAYAR_ID) {
+    OrenNayarBsdf *bsdf = (OrenNayarBsdf *)sc;
+    return sqr(sqr(bsdf->roughness));
+  }
 
-	if(sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) {
-		PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)sc;
-		return sqr(sqr(bsdf->roughness));
-	}
+  if (sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) {
+    PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)sc;
+    return sqr(sqr(bsdf->roughness));
+  }
 
-	if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
-		return 0.0f;
-	}
+  if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
+    return 0.0f;
+  }
 
-	return bsdf_get_specular_roughness_squared(sc);
+  return bsdf_get_specular_roughness_squared(sc);
 }
 
 ccl_device_inline int bsdf_sample(KernelGlobals *kg,
@@ -83,133 +83,349 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
                                   differential3 *domega_in,
                                   float *pdf)
 {
-	int label;
+  int label;
 
-	switch(sc->type) {
-		case CLOSURE_BSDF_DIFFUSE_ID:
-		case CLOSURE_BSDF_BSSRDF_ID:
-			label = bsdf_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
+  switch (sc->type) {
+    case CLOSURE_BSDF_DIFFUSE_ID:
+    case CLOSURE_BSDF_BSSRDF_ID:
+      label = bsdf_diffuse_sample(sc,
+                                  sd->Ng,
+                                  sd->I,
+                                  sd->dI.dx,
+                                  sd->dI.dy,
+                                  randu,
+                                  randv,
+                                  eval,
+                                  omega_in,
+                                  &domega_in->dx,
+                                  &domega_in->dy,
+                                  pdf);
+      break;
 #ifdef __SVM__
-		case CLOSURE_BSDF_OREN_NAYAR_ID:
-			label = bsdf_oren_nayar_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-#ifdef __OSL__
-		case CLOSURE_BSDF_PHONG_RAMP_ID:
-			label = bsdf_phong_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
-			label = bsdf_diffuse_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-#endif
-		case CLOSURE_BSDF_TRANSLUCENT_ID:
-			label = bsdf_translucent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_REFLECTION_ID:
-			label = bsdf_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_REFRACTION_ID:
-			label = bsdf_refraction_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_TRANSPARENT_ID:
-			label = bsdf_transparent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_MICROFACET_GGX_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
-			label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
-			label = bsdf_microfacet_multi_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-			        eval, omega_in,  &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state);
-			break;
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
-			label = bsdf_microfacet_multi_ggx_glass_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-			        eval, omega_in,  &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state);
-			break;
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
-			label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
-		case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
-			label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
-			label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_DIFFUSE_TOON_ID:
-			label = bsdf_diffuse_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_GLOSSY_TOON_ID:
-			label = bsdf_glossy_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_HAIR_REFLECTION_ID:
-			label = bsdf_hair_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
-			label = bsdf_hair_transmission_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
-			label = bsdf_principled_hair_sample(kg, sc, sd, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-#ifdef __PRINCIPLED__
-		case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
-		case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
-			label = bsdf_principled_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
-			label = bsdf_principled_sheen_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
-				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-#endif  /* __PRINCIPLED__ */
+    case CLOSURE_BSDF_OREN_NAYAR_ID:
+      label = bsdf_oren_nayar_sample(sc,
+                                     sd->Ng,
+                                     sd->I,
+                                     sd->dI.dx,
+                                     sd->dI.dy,
+                                     randu,
+                                     randv,
+                                     eval,
+                                     omega_in,
+                                     &domega_in->dx,
+                                     &domega_in->dy,
+                                     pdf);
+      break;
+#  ifdef __OSL__
+    case CLOSURE_BSDF_PHONG_RAMP_ID:
+      label = bsdf_phong_ramp_sample(sc,
+                                     sd->Ng,
+                                     sd->I,
+                                     sd->dI.dx,
+                                     sd->dI.dy,
+                                     randu,
+                                     randv,
+                                     eval,
+                                     omega_in,
+                                     &domega_in->dx,
+                                     &domega_in->dy,
+                                     pdf);
+      break;
+    case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
+      label = bsdf_diffuse_ramp_sample(sc,
+                                       sd->Ng,
+                                       sd->I,
+                                       sd->dI.dx,
+                                       sd->dI.dy,
+                                       randu,
+                                       randv,
+                                       eval,
+                                       omega_in,
+                                       &domega_in->dx,
+                                       &domega_in->dy,
+                                       pdf);
+      break;
+#  endif
+    case CLOSURE_BSDF_TRANSLUCENT_ID:
+      label = bsdf_translucent_sample(sc,
+                                      sd->Ng,
+                                      sd->I,
+                                      sd->dI.dx,
+                                      sd->dI.dy,
+                                      randu,
+                                      randv,
+                                      eval,
+                                      omega_in,
+                                      &domega_in->dx,
+                                      &domega_in->dy,
+                                      pdf);
+      break;
+    case CLOSURE_BSDF_REFLECTION_ID:
+      label = bsdf_reflection_sample(sc,
+                                     sd->Ng,
+                                     sd->I,
+                                     sd->dI.dx,
+                                     sd->dI.dy,
+                                     randu,
+                                     randv,
+                                     eval,
+                                     omega_in,
+                                     &domega_in->dx,
+                                     &domega_in->dy,
+                                     pdf);
+      break;
+    case CLOSURE_BSDF_REFRACTION_ID:
+      label = bsdf_refraction_sample(sc,
+                                     sd->Ng,
+                                     sd->I,
+                                     sd->dI.dx,
+                                     sd->dI.dy,
+                                     randu,
+                                     randv,
+                                     eval,
+                                     omega_in,
+                                     &domega_in->dx,
+                                     &domega_in->dy,
+                                     pdf);
+      break;
+    case CLOSURE_BSDF_TRANSPARENT_ID:
+      label = bsdf_transparent_sample(sc,
+                                      sd->Ng,
+                                      sd->I,
+                                      sd->dI.dx,
+                                      sd->dI.dy,
+                                      randu,
+                                      randv,
+                                      eval,
+                                      omega_in,
+                                      &domega_in->dx,
+                                      &domega_in->dy,
+                                      pdf);
+      break;
+    case CLOSURE_BSDF_MICROFACET_GGX_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+      label = bsdf_microfacet_ggx_sample(kg,
+                                         sc,
+                                         sd->Ng,
+                                         sd->I,
+                                         sd->dI.dx,
+                                         sd->dI.dy,
+                                         randu,
+                                         randv,
+                                         eval,
+                                         omega_in,
+                                         &domega_in->dx,
+                                         &domega_in->dy,
+                                         pdf);
+      break;
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
+      label = bsdf_microfacet_multi_ggx_sample(kg,
+                                               sc,
+                                               sd->Ng,
+                                               sd->I,
+                                               sd->dI.dx,
+                                               sd->dI.dy,
+                                               randu,
+                                               randv,
+                                               eval,
+                                               omega_in,
+                                               &domega_in->dx,
+                                               &domega_in->dy,
+                                               pdf,
+                                               &sd->lcg_state);
+      break;
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+      label = bsdf_microfacet_multi_ggx_glass_sample(kg,
+                                                     sc,
+                                                     sd->Ng,
+                                                     sd->I,
+                                                     sd->dI.dx,
+                                                     sd->dI.dy,
+                                                     randu,
+                                                     randv,
+                                                     eval,
+                                                     omega_in,
+                                                     &domega_in->dx,
+                                                     &domega_in->dy,
+                                                     pdf,
+                                                     &sd->lcg_state);
+      break;
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+      label = bsdf_microfacet_beckmann_sample(kg,
+                                              sc,
+                                              sd->Ng,
+                                              sd->I,
+                                              sd->dI.dx,
+                                              sd->dI.dy,
+                                              randu,
+                                              randv,
+                                              eval,
+                                              omega_in,
+                                              &domega_in->dx,
+                                              &domega_in->dy,
+                                              pdf);
+      break;
+    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+      label = bsdf_ashikhmin_shirley_sample(sc,
+                                            sd->Ng,
+                                            sd->I,
+                                            sd->dI.dx,
+                                            sd->dI.dy,
+                                            randu,
+                                            randv,
+                                            eval,
+                                            omega_in,
+                                            &domega_in->dx,
+                                            &domega_in->dy,
+                                            pdf);
+      break;
+    case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+      label = bsdf_ashikhmin_velvet_sample(sc,
+                                           sd->Ng,
+                                           sd->I,
+                                           sd->dI.dx,
+                                           sd->dI.dy,
+                                           randu,
+                                           randv,
+                                           eval,
+                                           omega_in,
+                                           &domega_in->dx,
+                                           &domega_in->dy,
+                                           pdf);
+      break;
+    case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+      label = bsdf_diffuse_toon_sample(sc,
+                                       sd->Ng,
+                                       sd->I,
+                                       sd->dI.dx,
+                                       sd->dI.dy,
+                                       randu,
+                                       randv,
+                                       eval,
+                                       omega_in,
+                                       &domega_in->dx,
+                                       &domega_in->dy,
+                                       pdf);
+      break;
+    case CLOSURE_BSDF_GLOSSY_TOON_ID:
+      label = bsdf_glossy_toon_sample(sc,
+                                      sd->Ng,
+                                      sd->I,
+                                      sd->dI.dx,
+                                      sd->dI.dy,
+                                      randu,
+                                      randv,
+                                      eval,
+                                      omega_in,
+                                      &domega_in->dx,
+                                      &domega_in->dy,
+                                      pdf);
+      break;
+    case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+      label = bsdf_hair_reflection_sample(sc,
+                                          sd->Ng,
+                                          sd->I,
+                                          sd->dI.dx,
+                                          sd->dI.dy,
+                                          randu,
+                                          randv,
+                                          eval,
+                                          omega_in,
+                                          &domega_in->dx,
+                                          &domega_in->dy,
+                                          pdf);
+      break;
+    case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+      label = bsdf_hair_transmission_sample(sc,
+                                            sd->Ng,
+                                            sd->I,
+                                            sd->dI.dx,
+                                            sd->dI.dy,
+                                            randu,
+                                            randv,
+                                            eval,
+                                            omega_in,
+                                            &domega_in->dx,
+                                            &domega_in->dy,
+                                            pdf);
+      break;
+    case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+      label = bsdf_principled_hair_sample(
+          kg, sc, sd, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
+      break;
+#  ifdef __PRINCIPLED__
+    case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+    case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
+      label = bsdf_principled_diffuse_sample(sc,
+                                             sd->Ng,
+                                             sd->I,
+                                             sd->dI.dx,
+                                             sd->dI.dy,
+                                             randu,
+                                             randv,
+                                             eval,
+                                             omega_in,
+                                             &domega_in->dx,
+                                             &domega_in->dy,
+                                             pdf);
+      break;
+    case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
+      label = bsdf_principled_sheen_sample(sc,
+                                           sd->Ng,
+                                           sd->I,
+                                           sd->dI.dx,
+                                           sd->dI.dy,
+                                           randu,
+                                           randv,
+                                           eval,
+                                           omega_in,
+                                           &domega_in->dx,
+                                           &domega_in->dy,
+                                           pdf);
+      break;
+#  endif /* __PRINCIPLED__ */
 #endif
 #ifdef __VOLUME__
-		case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
-			label = volume_henyey_greenstein_sample(sc, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
+    case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
+      label = volume_henyey_greenstein_sample(sc,
+                                              sd->I,
+                                              sd->dI.dx,
+                                              sd->dI.dy,
+                                              randu,
+                                              randv,
+                                              eval,
+                                              omega_in,
+                                              &domega_in->dx,
+                                              &domega_in->dy,
+                                              pdf);
+      break;
 #endif
-		default:
-			label = LABEL_NONE;
-			break;
-	}
+    default:
+      label = LABEL_NONE;
+      break;
+  }
 
-	/* Test if BSDF sample should be treated as transparent for background. */
-	if(label & LABEL_TRANSMIT) {
-		float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold;
+  /* Test if BSDF sample should be treated as transparent for background. */
+  if (label & LABEL_TRANSMIT) {
+    float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold;
 
-		if(threshold_squared >= 0.0f) {
-			if(bsdf_get_specular_roughness_squared(sc) <= threshold_squared) {
-				label |= LABEL_TRANSMIT_TRANSPARENT;
-			}
-		}
-	}
+    if (threshold_squared >= 0.0f) {
+      if (bsdf_get_specular_roughness_squared(sc) <= threshold_squared) {
+        label |= LABEL_TRANSMIT_TRANSPARENT;
+      }
+    }
+  }
 
-	return label;
+  return label;
 }
 
 #ifndef __KERNEL_CUDA__
@@ -217,285 +433,288 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-float3 bsdf_eval(KernelGlobals *kg,
-                 ShaderData *sd,
-                 const ShaderClosure *sc,
-                 const float3 omega_in,
-                 float *pdf)
+    float3
+    bsdf_eval(KernelGlobals *kg,
+              ShaderData *sd,
+              const ShaderClosure *sc,
+              const float3 omega_in,
+              float *pdf)
 {
-	float3 eval;
+  float3 eval;
 
-	if(dot(sd->Ng, omega_in) >= 0.0f) {
-		switch(sc->type) {
-			case CLOSURE_BSDF_DIFFUSE_ID:
-			case CLOSURE_BSDF_BSSRDF_ID:
-				eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
+  if (dot(sd->Ng, omega_in) >= 0.0f) {
+    switch (sc->type) {
+      case CLOSURE_BSDF_DIFFUSE_ID:
+      case CLOSURE_BSDF_BSSRDF_ID:
+        eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
 #ifdef __SVM__
-			case CLOSURE_BSDF_OREN_NAYAR_ID:
-				eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-#ifdef __OSL__
-			case CLOSURE_BSDF_PHONG_RAMP_ID:
-				eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
-				eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-#endif
-			case CLOSURE_BSDF_TRANSLUCENT_ID:
-				eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_REFLECTION_ID:
-				eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_REFRACTION_ID:
-				eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_TRANSPARENT_ID:
-				eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_MICROFACET_GGX_ID:
-			case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
-			case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
-			case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
-			case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
-			case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
-				eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
-			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
-				eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state);
-				break;
-			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
-			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
-				eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state);
-				break;
-			case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
-			case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
-			case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
-				eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
-			case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
-				eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
-				eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_DIFFUSE_TOON_ID:
-				eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_GLOSSY_TOON_ID:
-				eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
-				eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_HAIR_REFLECTION_ID:
-				eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
-				eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-#ifdef __PRINCIPLED__
-			case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
-			case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
-				eval = bsdf_principled_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
-				eval = bsdf_principled_sheen_eval_reflect(sc, sd->I, omega_in, pdf);
-				break;
-#endif  /* __PRINCIPLED__ */
+      case CLOSURE_BSDF_OREN_NAYAR_ID:
+        eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+#  ifdef __OSL__
+      case CLOSURE_BSDF_PHONG_RAMP_ID:
+        eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
+        eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+#  endif
+      case CLOSURE_BSDF_TRANSLUCENT_ID:
+        eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_REFLECTION_ID:
+        eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_REFRACTION_ID:
+        eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_TRANSPARENT_ID:
+        eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_MICROFACET_GGX_ID:
+      case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+      case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+      case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+      case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
+      case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+        eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+      case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
+        eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state);
+        break;
+      case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+      case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+        eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(
+            sc, sd->I, omega_in, pdf, &sd->lcg_state);
+        break;
+      case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+      case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+      case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+        eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+      case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+        eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+        eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+        eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_GLOSSY_TOON_ID:
+        eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+        eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+        eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+        eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+#  ifdef __PRINCIPLED__
+      case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+      case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
+        eval = bsdf_principled_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
+        eval = bsdf_principled_sheen_eval_reflect(sc, sd->I, omega_in, pdf);
+        break;
+#  endif /* __PRINCIPLED__ */
 #endif
 #ifdef __VOLUME__
-			case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
-				eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
-				break;
+      case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
+        eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
+        break;
 #endif
-			default:
-				eval = make_float3(0.0f, 0.0f, 0.0f);
-				break;
-		}
-	}
-	else {
-		switch(sc->type) {
-			case CLOSURE_BSDF_DIFFUSE_ID:
-			case CLOSURE_BSDF_BSSRDF_ID:
-				eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
+      default:
+        eval = make_float3(0.0f, 0.0f, 0.0f);
+        break;
+    }
+  }
+  else {
+    switch (sc->type) {
+      case CLOSURE_BSDF_DIFFUSE_ID:
+      case CLOSURE_BSDF_BSSRDF_ID:
+        eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
 #ifdef __SVM__
-			case CLOSURE_BSDF_OREN_NAYAR_ID:
-				eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_TRANSLUCENT_ID:
-				eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_REFLECTION_ID:
-				eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_REFRACTION_ID:
-				eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_TRANSPARENT_ID:
-				eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_MICROFACET_GGX_ID:
-			case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
-			case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
-			case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
-			case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
-			case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
-				eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
-			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
-				eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state);
-				break;
-			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
-			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
-				eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state);
-				break;
-			case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
-			case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
-			case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
-				eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
-			case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
-				eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
-				eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_DIFFUSE_TOON_ID:
-				eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_GLOSSY_TOON_ID:
-				eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
-				eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_HAIR_REFLECTION_ID:
-				eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
-				eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-#ifdef __PRINCIPLED__
-			case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
-			case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
-				eval = bsdf_principled_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-			case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
-				eval = bsdf_principled_sheen_eval_transmit(sc, sd->I, omega_in, pdf);
-				break;
-#endif  /* __PRINCIPLED__ */
+      case CLOSURE_BSDF_OREN_NAYAR_ID:
+        eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_TRANSLUCENT_ID:
+        eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_REFLECTION_ID:
+        eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_REFRACTION_ID:
+        eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_TRANSPARENT_ID:
+        eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_MICROFACET_GGX_ID:
+      case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+      case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+      case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+      case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
+      case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+        eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+      case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
+        eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state);
+        break;
+      case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+      case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+        eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(
+            sc, sd->I, omega_in, pdf, &sd->lcg_state);
+        break;
+      case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+      case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+      case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+        eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+      case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+        eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+        eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+        eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_GLOSSY_TOON_ID:
+        eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+        eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+        eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+        eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+#  ifdef __PRINCIPLED__
+      case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+      case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
+        eval = bsdf_principled_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+      case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
+        eval = bsdf_principled_sheen_eval_transmit(sc, sd->I, omega_in, pdf);
+        break;
+#  endif /* __PRINCIPLED__ */
 #endif
 #ifdef __VOLUME__
-			case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
-				eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
-				break;
+      case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
+        eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
+        break;
 #endif
-			default:
-				eval = make_float3(0.0f, 0.0f, 0.0f);
-				break;
-		}
-	}
+      default:
+        eval = make_float3(0.0f, 0.0f, 0.0f);
+        break;
+    }
+  }
 
-	return eval;
+  return eval;
 }
 
 ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness)
 {
-	/* ToDo: do we want to blur volume closures? */
+  /* ToDo: do we want to blur volume closures? */
 #ifdef __SVM__
-	switch(sc->type) {
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
-			bsdf_microfacet_multi_ggx_blur(sc, roughness);
-			break;
-		case CLOSURE_BSDF_MICROFACET_GGX_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
-			bsdf_microfacet_ggx_blur(sc, roughness);
-			break;
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
-			bsdf_microfacet_beckmann_blur(sc, roughness);
-			break;
-		case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
-		case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
-			bsdf_ashikhmin_shirley_blur(sc, roughness);
-			break;
-		case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
-			bsdf_principled_hair_blur(sc, roughness);
-			break;
-		default:
-			break;
-	}
+  switch (sc->type) {
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+      bsdf_microfacet_multi_ggx_blur(sc, roughness);
+      break;
+    case CLOSURE_BSDF_MICROFACET_GGX_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+      bsdf_microfacet_ggx_blur(sc, roughness);
+      break;
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+      bsdf_microfacet_beckmann_blur(sc, roughness);
+      break;
+    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+      bsdf_ashikhmin_shirley_blur(sc, roughness);
+      break;
+    case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+      bsdf_principled_hair_blur(sc, roughness);
+      break;
+    default:
+      break;
+  }
 #endif
 }
 
 ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b)
 {
 #ifdef __SVM__
-	switch(a->type) {
-		case CLOSURE_BSDF_TRANSPARENT_ID:
-			return true;
-		case CLOSURE_BSDF_DIFFUSE_ID:
-		case CLOSURE_BSDF_BSSRDF_ID:
-		case CLOSURE_BSDF_TRANSLUCENT_ID:
-			return bsdf_diffuse_merge(a, b);
-		case CLOSURE_BSDF_OREN_NAYAR_ID:
-			return bsdf_oren_nayar_merge(a, b);
-		case CLOSURE_BSDF_REFLECTION_ID:
-		case CLOSURE_BSDF_REFRACTION_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
-		case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
-		case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
-			return bsdf_microfacet_merge(a, b);
-		case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
-			return bsdf_ashikhmin_velvet_merge(a, b);
-		case CLOSURE_BSDF_DIFFUSE_TOON_ID:
-		case CLOSURE_BSDF_GLOSSY_TOON_ID:
-			return bsdf_toon_merge(a, b);
-		case CLOSURE_BSDF_HAIR_REFLECTION_ID:
-		case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
-			return bsdf_hair_merge(a, b);
-#ifdef __PRINCIPLED__
-		case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
-		case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
-			return bsdf_principled_diffuse_merge(a, b);
-#endif
-#ifdef __VOLUME__
-		case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
-			return volume_henyey_greenstein_merge(a, b);
-#endif
-		default:
-			return false;
-	}
+  switch (a->type) {
+    case CLOSURE_BSDF_TRANSPARENT_ID:
+      return true;
+    case CLOSURE_BSDF_DIFFUSE_ID:
+    case CLOSURE_BSDF_BSSRDF_ID:
+    case CLOSURE_BSDF_TRANSLUCENT_ID:
+      return bsdf_diffuse_merge(a, b);
+    case CLOSURE_BSDF_OREN_NAYAR_ID:
+      return bsdf_oren_nayar_merge(a, b);
+    case CLOSURE_BSDF_REFLECTION_ID:
+    case CLOSURE_BSDF_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+      return bsdf_microfacet_merge(a, b);
+    case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+      return bsdf_ashikhmin_velvet_merge(a, b);
+    case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+    case CLOSURE_BSDF_GLOSSY_TOON_ID:
+      return bsdf_toon_merge(a, b);
+    case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+    case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+      return bsdf_hair_merge(a, b);
+#  ifdef __PRINCIPLED__
+    case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+    case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
+      return bsdf_principled_diffuse_merge(a, b);
+#  endif
+#  ifdef __VOLUME__
+    case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
+      return volume_henyey_greenstein_merge(a, b);
+#  endif
+    default:
+      return false;
+  }
 #else
-	return false;
+  return false;
 #endif
 }
 
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index 4e7425bd800..b3b1c37748d 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -33,203 +33,226 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device int bsdf_ashikhmin_shirley_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
-	bsdf->alpha_y = bsdf->alpha_x;
+  bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+  bsdf->alpha_y = bsdf->alpha_x;
 
-	bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID;
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device int bsdf_ashikhmin_shirley_aniso_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
-	bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
+  bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+  bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
 
-	bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device void bsdf_ashikhmin_shirley_blur(ShaderClosure *sc, float roughness)
 {
-	MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
 
-	bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
-	bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
+  bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+  bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
 }
 
 ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float roughness)
 {
-	return 2.0f / (roughness*roughness) - 2.0f;
+  return 2.0f / (roughness * roughness) - 2.0f;
 }
 
-ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(
-        const ShaderClosure *sc,
-        const float3 I,
-        const float3 omega_in,
-        float *pdf)
+ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc,
+                                                                  const float3 I,
+                                                                  const float3 omega_in,
+                                                                  float *pdf)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-	float3 N = bsdf->N;
-
-	float NdotI = dot(N, I);           /* in Cycles/OSL convention I is omega_out    */
-	float NdotO = dot(N, omega_in);    /* and consequently we use for O omaga_in ;)  */
-
-	float out = 0.0f;
-
-	if(fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f)
-		return make_float3(0.0f, 0.0f, 0.0f);
-
-	if(NdotI > 0.0f && NdotO > 0.0f) {
-		NdotI = fmaxf(NdotI, 1e-6f);
-		NdotO = fmaxf(NdotO, 1e-6f);
-		float3 H = normalize(omega_in + I);
-		float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f);
-		float HdotN = fmaxf(dot(H, N), 1e-6f);
-
-		float pump = 1.0f / fmaxf(1e-6f, (HdotI*fmaxf(NdotO, NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */
-		/*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */
-
-		float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
-		float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
-
-		if(n_x == n_y) {
-			/* isotropic */
-			float e = n_x;
-			float lobe = powf(HdotN, e);
-			float norm = (n_x + 1.0f) / (8.0f * M_PI_F);
-
-			out = NdotO * norm * lobe * pump;
-			*pdf = norm * lobe / HdotI; /* this is p_h / 4(H.I)  (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */
-		}
-		else {
-			/* anisotropic */
-			float3 X, Y;
-			make_orthonormals_tangent(N, bsdf->T, &X, &Y);
-
-			float HdotX = dot(H, X);
-			float HdotY = dot(H, Y);
-			float lobe;
-			if(HdotN < 1.0f) {
-				float e = (n_x * HdotX*HdotX + n_y * HdotY*HdotY) / (1.0f - HdotN*HdotN);
-				lobe = powf(HdotN, e);
-			}
-			else {
-				lobe = 1.0f;
-			}
-			float norm = sqrtf((n_x + 1.0f)*(n_y + 1.0f)) / (8.0f * M_PI_F);
-
-			out = NdotO * norm * lobe * pump;
-			*pdf = norm * lobe / HdotI;
-		}
-	}
-
-	return make_float3(out, out, out);
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  float3 N = bsdf->N;
+
+  float NdotI = dot(N, I);        /* in Cycles/OSL convention I is omega_out    */
+  float NdotO = dot(N, omega_in); /* and consequently we use for O omaga_in ;)  */
+
+  float out = 0.0f;
+
+  if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+  if (NdotI > 0.0f && NdotO > 0.0f) {
+    NdotI = fmaxf(NdotI, 1e-6f);
+    NdotO = fmaxf(NdotO, 1e-6f);
+    float3 H = normalize(omega_in + I);
+    float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f);
+    float HdotN = fmaxf(dot(H, N), 1e-6f);
+
+    float pump =
+        1.0f /
+        fmaxf(
+            1e-6f,
+            (HdotI *
+             fmaxf(
+                 NdotO,
+                 NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */
+    /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */
+
+    float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
+    float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
+
+    if (n_x == n_y) {
+      /* isotropic */
+      float e = n_x;
+      float lobe = powf(HdotN, e);
+      float norm = (n_x + 1.0f) / (8.0f * M_PI_F);
+
+      out = NdotO * norm * lobe * pump;
+      *pdf =
+          norm * lobe /
+          HdotI; /* this is p_h / 4(H.I)  (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */
+    }
+    else {
+      /* anisotropic */
+      float3 X, Y;
+      make_orthonormals_tangent(N, bsdf->T, &X, &Y);
+
+      float HdotX = dot(H, X);
+      float HdotY = dot(H, Y);
+      float lobe;
+      if (HdotN < 1.0f) {
+        float e = (n_x * HdotX * HdotX + n_y * HdotY * HdotY) / (1.0f - HdotN * HdotN);
+        lobe = powf(HdotN, e);
+      }
+      else {
+        lobe = 1.0f;
+      }
+      float norm = sqrtf((n_x + 1.0f) * (n_y + 1.0f)) / (8.0f * M_PI_F);
+
+      out = NdotO * norm * lobe * pump;
+      *pdf = norm * lobe / HdotI;
+    }
+  }
+
+  return make_float3(out, out, out);
 }
 
-ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(const ShaderClosure *sc,
+                                                       const float3 I,
+                                                       const float3 omega_in,
+                                                       float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x, float n_y, float randu, float randv, float *phi, float *cos_theta)
+ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(
+    float n_x, float n_y, float randu, float randv, float *phi, float *cos_theta)
 {
-	*phi = atanf(sqrtf((n_x + 1.0f) / (n_y + 1.0f)) * tanf(M_PI_2_F * randu));
-	float cos_phi = cosf(*phi);
-	float sin_phi = sinf(*phi);
-	*cos_theta = powf(randv, 1.0f / (n_x * cos_phi*cos_phi + n_y * sin_phi*sin_phi + 1.0f));
+  *phi = atanf(sqrtf((n_x + 1.0f) / (n_y + 1.0f)) * tanf(M_PI_2_F * randu));
+  float cos_phi = cosf(*phi);
+  float sin_phi = sinf(*phi);
+  *cos_theta = powf(randv, 1.0f / (n_x * cos_phi * cos_phi + n_y * sin_phi * sin_phi + 1.0f));
 }
 
-ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc,
+                                             float3 Ng,
+                                             float3 I,
+                                             float3 dIdx,
+                                             float3 dIdy,
+                                             float randu,
+                                             float randv,
+                                             float3 *eval,
+                                             float3 *omega_in,
+                                             float3 *domega_in_dx,
+                                             float3 *domega_in_dy,
+                                             float *pdf)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-	float3 N = bsdf->N;
-	int label = LABEL_REFLECT | LABEL_GLOSSY;
-
-	float NdotI = dot(N, I);
-	if(NdotI > 0.0f) {
-
-		float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
-		float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
-
-		/* get x,y basis on the surface for anisotropy */
-		float3 X, Y;
-
-		if(n_x == n_y)
-			make_orthonormals(N, &X, &Y);
-		else
-			make_orthonormals_tangent(N, bsdf->T, &X, &Y);
-
-		/* sample spherical coords for h in tangent space */
-		float phi;
-		float cos_theta;
-		if(n_x == n_y) {
-			/* isotropic sampling */
-			phi = M_2PI_F * randu;
-			cos_theta = powf(randv, 1.0f / (n_x + 1.0f));
-		}
-		else {
-			/* anisotropic sampling */
-			if(randu < 0.25f) {      /* first quadrant */
-				float remapped_randu = 4.0f * randu;
-				bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
-			}
-			else if(randu < 0.5f) {  /* second quadrant */
-				float remapped_randu = 4.0f * (.5f - randu);
-				bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
-				phi = M_PI_F - phi;
-			}
-			else if(randu < 0.75f) { /* third quadrant */
-				float remapped_randu = 4.0f * (randu - 0.5f);
-				bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
-				phi = M_PI_F + phi;
-			}
-			else {                   /* fourth quadrant */
-				float remapped_randu = 4.0f * (1.0f - randu);
-				bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
-				phi = 2.0f * M_PI_F - phi;
-			}
-		}
-
-		/* get half vector in tangent space */
-		float sin_theta = sqrtf(fmaxf(0.0f, 1.0f - cos_theta*cos_theta));
-		float cos_phi = cosf(phi);
-		float sin_phi = sinf(phi); /* no sqrt(1-cos^2) here b/c it causes artifacts */
-		float3 h = make_float3(
-			sin_theta * cos_phi,
-			sin_theta * sin_phi,
-			cos_theta
-			);
-
-		/* half vector to world space */
-		float3 H = h.x*X + h.y*Y + h.z*N;
-		float HdotI = dot(H, I);
-		if(HdotI < 0.0f) H = -H;
-
-		/* reflect I on H to get omega_in */
-		*omega_in = -I + (2.0f * HdotI) * H;
-
-		if(fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) {
-			/* Some high number for MIS. */
-			*pdf = 1e6f;
-			*eval = make_float3(1e6f, 1e6f, 1e6f);
-			label = LABEL_REFLECT | LABEL_SINGULAR;
-		}
-		else {
-			/* leave the rest to eval_reflect */
-			*eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf);
-		}
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  float3 N = bsdf->N;
+  int label = LABEL_REFLECT | LABEL_GLOSSY;
+
+  float NdotI = dot(N, I);
+  if (NdotI > 0.0f) {
+
+    float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
+    float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
+
+    /* get x,y basis on the surface for anisotropy */
+    float3 X, Y;
+
+    if (n_x == n_y)
+      make_orthonormals(N, &X, &Y);
+    else
+      make_orthonormals_tangent(N, bsdf->T, &X, &Y);
+
+    /* sample spherical coords for h in tangent space */
+    float phi;
+    float cos_theta;
+    if (n_x == n_y) {
+      /* isotropic sampling */
+      phi = M_2PI_F * randu;
+      cos_theta = powf(randv, 1.0f / (n_x + 1.0f));
+    }
+    else {
+      /* anisotropic sampling */
+      if (randu < 0.25f) { /* first quadrant */
+        float remapped_randu = 4.0f * randu;
+        bsdf_ashikhmin_shirley_sample_first_quadrant(
+            n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+      }
+      else if (randu < 0.5f) { /* second quadrant */
+        float remapped_randu = 4.0f * (.5f - randu);
+        bsdf_ashikhmin_shirley_sample_first_quadrant(
+            n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+        phi = M_PI_F - phi;
+      }
+      else if (randu < 0.75f) { /* third quadrant */
+        float remapped_randu = 4.0f * (randu - 0.5f);
+        bsdf_ashikhmin_shirley_sample_first_quadrant(
+            n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+        phi = M_PI_F + phi;
+      }
+      else { /* fourth quadrant */
+        float remapped_randu = 4.0f * (1.0f - randu);
+        bsdf_ashikhmin_shirley_sample_first_quadrant(
+            n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+        phi = 2.0f * M_PI_F - phi;
+      }
+    }
+
+    /* get half vector in tangent space */
+    float sin_theta = sqrtf(fmaxf(0.0f, 1.0f - cos_theta * cos_theta));
+    float cos_phi = cosf(phi);
+    float sin_phi = sinf(phi); /* no sqrt(1-cos^2) here b/c it causes artifacts */
+    float3 h = make_float3(sin_theta * cos_phi, sin_theta * sin_phi, cos_theta);
+
+    /* half vector to world space */
+    float3 H = h.x * X + h.y * Y + h.z * N;
+    float HdotI = dot(H, I);
+    if (HdotI < 0.0f)
+      H = -H;
+
+    /* reflect I on H to get omega_in */
+    *omega_in = -I + (2.0f * HdotI) * H;
+
+    if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) {
+      /* Some high number for MIS. */
+      *pdf = 1e6f;
+      *eval = make_float3(1e6f, 1e6f, 1e6f);
+      label = LABEL_REFLECT | LABEL_SINGULAR;
+    }
+    else {
+      /* leave the rest to eval_reflect */
+      *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf);
+    }
 
 #ifdef __RAY_DIFFERENTIALS__
-		/* just do the reflection thing for now */
-		*domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx;
-		*domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy;
+    /* just do the reflection thing for now */
+    *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx;
+    *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy;
 #endif
-	}
+  }
 
-	return label;
+  return label;
 }
 
-
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_ASHIKHMIN_SHIRLEY_H__ */
+#endif /* __BSDF_ASHIKHMIN_SHIRLEY_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index 80fd9ba2b37..8122bcc1424 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@ -36,126 +36,142 @@
 CCL_NAMESPACE_BEGIN
 
 typedef ccl_addr_space struct VelvetBsdf {
-	SHADER_CLOSURE_BASE;
+  SHADER_CLOSURE_BASE;
 
-	float sigma;
-	float invsigma2;
+  float sigma;
+  float invsigma2;
 } VelvetBsdf;
 
 ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf)
 {
-	float sigma = fmaxf(bsdf->sigma, 0.01f);
-	bsdf->invsigma2 = 1.0f/(sigma * sigma);
+  float sigma = fmaxf(bsdf->sigma, 0.01f);
+  bsdf->invsigma2 = 1.0f / (sigma * sigma);
 
-	bsdf->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID;
+  bsdf->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID;
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device bool bsdf_ashikhmin_velvet_merge(const ShaderClosure *a, const ShaderClosure *b)
 {
-	const VelvetBsdf *bsdf_a = (const VelvetBsdf*)a;
-	const VelvetBsdf *bsdf_b = (const VelvetBsdf*)b;
+  const VelvetBsdf *bsdf_a = (const VelvetBsdf *)a;
+  const VelvetBsdf *bsdf_b = (const VelvetBsdf *)b;
 
-	return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
-	       (bsdf_a->sigma == bsdf_b->sigma);
+  return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->sigma == bsdf_b->sigma);
 }
 
-ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc,
+                                                     const float3 I,
+                                                     const float3 omega_in,
+                                                     float *pdf)
 {
-	const VelvetBsdf *bsdf = (const VelvetBsdf*)sc;
-	float m_invsigma2 = bsdf->invsigma2;
-	float3 N = bsdf->N;
+  const VelvetBsdf *bsdf = (const VelvetBsdf *)sc;
+  float m_invsigma2 = bsdf->invsigma2;
+  float3 N = bsdf->N;
 
-	float cosNO = dot(N, I);
-	float cosNI = dot(N, omega_in);
-	if(cosNO > 0 && cosNI > 0) {
-		float3 H = normalize(omega_in + I);
+  float cosNO = dot(N, I);
+  float cosNI = dot(N, omega_in);
+  if (cosNO > 0 && cosNI > 0) {
+    float3 H = normalize(omega_in + I);
 
-		float cosNH = dot(N, H);
-		float cosHO = fabsf(dot(I, H));
+    float cosNH = dot(N, H);
+    float cosHO = fabsf(dot(I, H));
 
-		if(!(fabsf(cosNH) < 1.0f-1e-5f && cosHO > 1e-5f))
-			return make_float3(0.0f, 0.0f, 0.0f);
+    if (!(fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f))
+      return make_float3(0.0f, 0.0f, 0.0f);
 
-		float cosNHdivHO = cosNH / cosHO;
-		cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
+    float cosNHdivHO = cosNH / cosHO;
+    cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
 
-		float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
-		float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
+    float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
+    float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
 
-		float sinNH2 = 1 - cosNH * cosNH;
-		float sinNH4 = sinNH2 * sinNH2;
-		float cotangent2 = (cosNH * cosNH) / sinNH2;
+    float sinNH2 = 1 - cosNH * cosNH;
+    float sinNH4 = sinNH2 * sinNH2;
+    float cotangent2 = (cosNH * cosNH) / sinNH2;
 
-		float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
-		float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically
+    float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
+    float G = min(1.0f, min(fac1, fac2));  // TODO: derive G from D analytically
 
-		float out = 0.25f * (D * G) / cosNO;
+    float out = 0.25f * (D * G) / cosNO;
 
-		*pdf = 0.5f * M_1_PI_F;
-		return make_float3(out, out, out);
-	}
+    *pdf = 0.5f * M_1_PI_F;
+    return make_float3(out, out, out);
+  }
 
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc,
+                                                      const float3 I,
+                                                      const float3 omega_in,
+                                                      float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc,
+                                            float3 Ng,
+                                            float3 I,
+                                            float3 dIdx,
+                                            float3 dIdy,
+                                            float randu,
+                                            float randv,
+                                            float3 *eval,
+                                            float3 *omega_in,
+                                            float3 *domega_in_dx,
+                                            float3 *domega_in_dy,
+                                            float *pdf)
 {
-	const VelvetBsdf *bsdf = (const VelvetBsdf*)sc;
-	float m_invsigma2 = bsdf->invsigma2;
-	float3 N = bsdf->N;
+  const VelvetBsdf *bsdf = (const VelvetBsdf *)sc;
+  float m_invsigma2 = bsdf->invsigma2;
+  float3 N = bsdf->N;
 
-	// we are viewing the surface from above - send a ray out with uniform
-	// distribution over the hemisphere
-	sample_uniform_hemisphere(N, randu, randv, omega_in, pdf);
+  // we are viewing the surface from above - send a ray out with uniform
+  // distribution over the hemisphere
+  sample_uniform_hemisphere(N, randu, randv, omega_in, pdf);
 
-	if(dot(Ng, *omega_in) > 0) {
-		float3 H = normalize(*omega_in + I);
+  if (dot(Ng, *omega_in) > 0) {
+    float3 H = normalize(*omega_in + I);
 
-		float cosNI = dot(N, *omega_in);
-		float cosNO = dot(N, I);
-		float cosNH = dot(N, H);
-		float cosHO = fabsf(dot(I, H));
+    float cosNI = dot(N, *omega_in);
+    float cosNO = dot(N, I);
+    float cosNH = dot(N, H);
+    float cosHO = fabsf(dot(I, H));
 
-		if(fabsf(cosNO) > 1e-5f && fabsf(cosNH) < 1.0f-1e-5f && cosHO > 1e-5f) {
-			float cosNHdivHO = cosNH / cosHO;
-			cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
+    if (fabsf(cosNO) > 1e-5f && fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f) {
+      float cosNHdivHO = cosNH / cosHO;
+      cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
 
-			float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
-			float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
+      float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
+      float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
 
-			float sinNH2 = 1 - cosNH * cosNH;
-			float sinNH4 = sinNH2 * sinNH2;
-			float cotangent2 = (cosNH * cosNH) / sinNH2;
+      float sinNH2 = 1 - cosNH * cosNH;
+      float sinNH4 = sinNH2 * sinNH2;
+      float cotangent2 = (cosNH * cosNH) / sinNH2;
 
-			float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
-			float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically
+      float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
+      float G = min(1.0f, min(fac1, fac2));  // TODO: derive G from D analytically
 
-			float power = 0.25f * (D * G) / cosNO;
+      float power = 0.25f * (D * G) / cosNO;
 
-			*eval = make_float3(power, power, power);
+      *eval = make_float3(power, power, power);
 
 #ifdef __RAY_DIFFERENTIALS__
-			// TODO: find a better approximation for the retroreflective bounce
-			*domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
-			*domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
+      // TODO: find a better approximation for the retroreflective bounce
+      *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
+      *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
 #endif
-		}
-		else
-			*pdf = 0.0f;
-	}
-	else
-		*pdf = 0.0f;
-
-	return LABEL_REFLECT|LABEL_DIFFUSE;
+    }
+    else
+      *pdf = 0.0f;
+  }
+  else
+    *pdf = 0.0f;
+
+  return LABEL_REFLECT | LABEL_DIFFUSE;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_ASHIKHMIN_VELVET_H__ */
+#endif /* __BSDF_ASHIKHMIN_VELVET_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h
index 946c460a70e..76b50548455 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@ -36,107 +36,141 @@
 CCL_NAMESPACE_BEGIN
 
 typedef ccl_addr_space struct DiffuseBsdf {
-	SHADER_CLOSURE_BASE;
+  SHADER_CLOSURE_BASE;
 } DiffuseBsdf;
 
 /* DIFFUSE */
 
 ccl_device int bsdf_diffuse_setup(DiffuseBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_DIFFUSE_ID;
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_DIFFUSE_ID;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device bool bsdf_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b)
 {
-	const DiffuseBsdf *bsdf_a = (const DiffuseBsdf*)a;
-	const DiffuseBsdf *bsdf_b = (const DiffuseBsdf*)b;
+  const DiffuseBsdf *bsdf_a = (const DiffuseBsdf *)a;
+  const DiffuseBsdf *bsdf_b = (const DiffuseBsdf *)b;
 
-	return (isequal_float3(bsdf_a->N, bsdf_b->N));
+  return (isequal_float3(bsdf_a->N, bsdf_b->N));
 }
 
-ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc,
+                                            const float3 I,
+                                            const float3 omega_in,
+                                            float *pdf)
 {
-	const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
-	float3 N = bsdf->N;
+  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  float3 N = bsdf->N;
 
-	float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
-	*pdf = cos_pi;
-	return make_float3(cos_pi, cos_pi, cos_pi);
+  float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
+  *pdf = cos_pi;
+  return make_float3(cos_pi, cos_pi, cos_pi);
 }
 
-ccl_device float3 bsdf_diffuse_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_eval_transmit(const ShaderClosure *sc,
+                                             const float3 I,
+                                             const float3 omega_in,
+                                             float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc,
+                                   float3 Ng,
+                                   float3 I,
+                                   float3 dIdx,
+                                   float3 dIdy,
+                                   float randu,
+                                   float randv,
+                                   float3 *eval,
+                                   float3 *omega_in,
+                                   float3 *domega_in_dx,
+                                   float3 *domega_in_dy,
+                                   float *pdf)
 {
-	const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
-	float3 N = bsdf->N;
+  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  float3 N = bsdf->N;
 
-	// distribution over the hemisphere
-	sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
+  // distribution over the hemisphere
+  sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
 
-	if(dot(Ng, *omega_in) > 0.0f) {
-		*eval = make_float3(*pdf, *pdf, *pdf);
+  if (dot(Ng, *omega_in) > 0.0f) {
+    *eval = make_float3(*pdf, *pdf, *pdf);
 #ifdef __RAY_DIFFERENTIALS__
-		// TODO: find a better approximation for the diffuse bounce
-		*domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
-		*domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
+    // TODO: find a better approximation for the diffuse bounce
+    *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
+    *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
 #endif
-	}
-	else
-		*pdf = 0.0f;
+  }
+  else
+    *pdf = 0.0f;
 
-	return LABEL_REFLECT|LABEL_DIFFUSE;
+  return LABEL_REFLECT | LABEL_DIFFUSE;
 }
 
 /* TRANSLUCENT */
 
 ccl_device int bsdf_translucent_setup(DiffuseBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID;
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_translucent_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_translucent_eval_reflect(const ShaderClosure *sc,
+                                                const float3 I,
+                                                const float3 omega_in,
+                                                float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc,
+                                                 const float3 I,
+                                                 const float3 omega_in,
+                                                 float *pdf)
 {
-	const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
-	float3 N = bsdf->N;
+  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  float3 N = bsdf->N;
 
-	float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F;
-	*pdf = cos_pi;
-	return make_float3 (cos_pi, cos_pi, cos_pi);
+  float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F;
+  *pdf = cos_pi;
+  return make_float3(cos_pi, cos_pi, cos_pi);
 }
 
-ccl_device int bsdf_translucent_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_translucent_sample(const ShaderClosure *sc,
+                                       float3 Ng,
+                                       float3 I,
+                                       float3 dIdx,
+                                       float3 dIdy,
+                                       float randu,
+                                       float randv,
+                                       float3 *eval,
+                                       float3 *omega_in,
+                                       float3 *domega_in_dx,
+                                       float3 *domega_in_dy,
+                                       float *pdf)
 {
-	const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
-	float3 N = bsdf->N;
-
-	// we are viewing the surface from the right side - send a ray out with cosine
-	// distribution over the hemisphere
-	sample_cos_hemisphere (-N, randu, randv, omega_in, pdf);
-	if(dot(Ng, *omega_in) < 0) {
-		*eval = make_float3(*pdf, *pdf, *pdf);
+  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  float3 N = bsdf->N;
+
+  // we are viewing the surface from the right side - send a ray out with cosine
+  // distribution over the hemisphere
+  sample_cos_hemisphere(-N, randu, randv, omega_in, pdf);
+  if (dot(Ng, *omega_in) < 0) {
+    *eval = make_float3(*pdf, *pdf, *pdf);
 #ifdef __RAY_DIFFERENTIALS__
-		// TODO: find a better approximation for the diffuse bounce
-		*domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
-		*domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
+    // TODO: find a better approximation for the diffuse bounce
+    *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
+    *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
 #endif
-	}
-	else {
-		*pdf = 0;
-	}
-	return LABEL_TRANSMIT|LABEL_DIFFUSE;
+  }
+  else {
+    *pdf = 0;
+  }
+  return LABEL_TRANSMIT | LABEL_DIFFUSE;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_DIFFUSE_H__ */
+#endif /* __BSDF_DIFFUSE_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
index ca33a5b275c..9d13eb8d4e0 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
@@ -38,73 +38,90 @@ CCL_NAMESPACE_BEGIN
 #ifdef __OSL__
 
 typedef ccl_addr_space struct DiffuseRampBsdf {
-	SHADER_CLOSURE_BASE;
+  SHADER_CLOSURE_BASE;
 
-	float3 *colors;
+  float3 *colors;
 } DiffuseRampBsdf;
 
 ccl_device float3 bsdf_diffuse_ramp_get_color(const float3 colors[8], float pos)
 {
-	int MAXCOLORS = 8;
-
-	float npos = pos * (float)(MAXCOLORS - 1);
-	int ipos = float_to_int(npos);
-	if(ipos < 0)
-		return colors[0];
-	if(ipos >= (MAXCOLORS - 1))
-		return colors[MAXCOLORS - 1];
-	float offset = npos - (float)ipos;
-	return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset;
+  int MAXCOLORS = 8;
+
+  float npos = pos * (float)(MAXCOLORS - 1);
+  int ipos = float_to_int(npos);
+  if (ipos < 0)
+    return colors[0];
+  if (ipos >= (MAXCOLORS - 1))
+    return colors[MAXCOLORS - 1];
+  float offset = npos - (float)ipos;
+  return colors[ipos] * (1.0f - offset) + colors[ipos + 1] * offset;
 }
 
 ccl_device int bsdf_diffuse_ramp_setup(DiffuseRampBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID;
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device void bsdf_diffuse_ramp_blur(ShaderClosure *sc, float roughness)
 {
 }
 
-ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc,
+                                                 const float3 I,
+                                                 const float3 omega_in,
+                                                 float *pdf)
 {
-	const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf*)sc;
-	float3 N = bsdf->N;
+  const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
+  float3 N = bsdf->N;
 
-	float cos_pi = fmaxf(dot(N, omega_in), 0.0f);
-	*pdf = cos_pi * M_1_PI_F;
-	return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F;
+  float cos_pi = fmaxf(dot(N, omega_in), 0.0f);
+  *pdf = cos_pi * M_1_PI_F;
+  return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F;
 }
 
-ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc,
+                                                  const float3 I,
+                                                  const float3 omega_in,
+                                                  float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc,
+                                        float3 Ng,
+                                        float3 I,
+                                        float3 dIdx,
+                                        float3 dIdy,
+                                        float randu,
+                                        float randv,
+                                        float3 *eval,
+                                        float3 *omega_in,
+                                        float3 *domega_in_dx,
+                                        float3 *domega_in_dy,
+                                        float *pdf)
 {
-	const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf*)sc;
-	float3 N = bsdf->N;
-
-	// distribution over the hemisphere
-	sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
-
-	if(dot(Ng, *omega_in) > 0.0f) {
-		*eval = bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F;
-#ifdef __RAY_DIFFERENTIALS__
-		*domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
-		*domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
-#endif
-	}
-	else
-		*pdf = 0.0f;
-
-	return LABEL_REFLECT|LABEL_DIFFUSE;
+  const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
+  float3 N = bsdf->N;
+
+  // distribution over the hemisphere
+  sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
+
+  if (dot(Ng, *omega_in) > 0.0f) {
+    *eval = bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F;
+#  ifdef __RAY_DIFFERENTIALS__
+    *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
+    *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
+#  endif
+  }
+  else
+    *pdf = 0.0f;
+
+  return LABEL_REFLECT | LABEL_DIFFUSE;
 }
 
-#endif  /* __OSL__ */
+#endif /* __OSL__ */
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_DIFFUSE_RAMP_H__ */
+#endif /* __BSDF_DIFFUSE_RAMP_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index e1a0cfaa3f5..6b2a9a97d30 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -36,245 +36,276 @@
 CCL_NAMESPACE_BEGIN
 
 typedef ccl_addr_space struct HairBsdf {
-	SHADER_CLOSURE_BASE;
+  SHADER_CLOSURE_BASE;
 
-	float3 T;
-	float roughness1;
-	float roughness2;
-	float offset;
+  float3 T;
+  float roughness1;
+  float roughness2;
+  float offset;
 } HairBsdf;
 
 ccl_device int bsdf_hair_reflection_setup(HairBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_HAIR_REFLECTION_ID;
-	bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
-	bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_HAIR_REFLECTION_ID;
+  bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
+  bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device int bsdf_hair_transmission_setup(HairBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
-	bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
-	bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
+  bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
+  bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device bool bsdf_hair_merge(const ShaderClosure *a, const ShaderClosure *b)
 {
-	const HairBsdf *bsdf_a = (const HairBsdf*)a;
-	const HairBsdf *bsdf_b = (const HairBsdf*)b;
+  const HairBsdf *bsdf_a = (const HairBsdf *)a;
+  const HairBsdf *bsdf_b = (const HairBsdf *)b;
 
-	return (isequal_float3(bsdf_a->T, bsdf_b->T)) &&
-	       (bsdf_a->roughness1 == bsdf_b->roughness1) &&
-	       (bsdf_a->roughness2 == bsdf_b->roughness2) &&
-	       (bsdf_a->offset == bsdf_b->offset);
+  return (isequal_float3(bsdf_a->T, bsdf_b->T)) && (bsdf_a->roughness1 == bsdf_b->roughness1) &&
+         (bsdf_a->roughness2 == bsdf_b->roughness2) && (bsdf_a->offset == bsdf_b->offset);
 }
 
-ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc,
+                                                    const float3 I,
+                                                    const float3 omega_in,
+                                                    float *pdf)
 {
-	const HairBsdf *bsdf = (const HairBsdf*)sc;
-	float offset = bsdf->offset;
-	float3 Tg = bsdf->T;
-	float roughness1 = bsdf->roughness1;
-	float roughness2 = bsdf->roughness2;
+  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  float offset = bsdf->offset;
+  float3 Tg = bsdf->T;
+  float roughness1 = bsdf->roughness1;
+  float roughness2 = bsdf->roughness2;
 
-	float Iz = dot(Tg, I);
-	float3 locy = normalize(I - Tg * Iz);
+  float Iz = dot(Tg, I);
+  float3 locy = normalize(I - Tg * Iz);
 
-	float theta_r = M_PI_2_F - fast_acosf(Iz);
+  float theta_r = M_PI_2_F - fast_acosf(Iz);
 
-	float omega_in_z = dot(Tg, omega_in);
-	float3 omega_in_y = normalize(omega_in - Tg * omega_in_z);
+  float omega_in_z = dot(Tg, omega_in);
+  float3 omega_in_y = normalize(omega_in - Tg * omega_in_z);
 
-	float theta_i = M_PI_2_F - fast_acosf(omega_in_z);
-	float cosphi_i = dot(omega_in_y, locy);
+  float theta_i = M_PI_2_F - fast_acosf(omega_in_z);
+  float cosphi_i = dot(omega_in_y, locy);
 
-	if(M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) {
-		*pdf = 0.0f;
-		return make_float3(*pdf, *pdf, *pdf);
-	}
+  if (M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) {
+    *pdf = 0.0f;
+    return make_float3(*pdf, *pdf, *pdf);
+  }
 
-	float roughness1_inv = 1.0f / roughness1;
-	float roughness2_inv = 1.0f / roughness2;
-	float phi_i = fast_acosf(cosphi_i) * roughness2_inv;
-	phi_i = fabsf(phi_i) < M_PI_F ? phi_i : M_PI_F;
-	float costheta_i = fast_cosf(theta_i);
+  float roughness1_inv = 1.0f / roughness1;
+  float roughness2_inv = 1.0f / roughness2;
+  float phi_i = fast_acosf(cosphi_i) * roughness2_inv;
+  phi_i = fabsf(phi_i) < M_PI_F ? phi_i : M_PI_F;
+  float costheta_i = fast_cosf(theta_i);
 
-	float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
-	float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
+  float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
+  float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
 
-	float theta_h = (theta_i + theta_r) * 0.5f;
-	float t = theta_h - offset;
+  float theta_h = (theta_i + theta_r) * 0.5f;
+  float t = theta_h - offset;
 
-	float phi_pdf = fast_cosf(phi_i * 0.5f) * 0.25f * roughness2_inv;
-	float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)* costheta_i);
-	*pdf = phi_pdf * theta_pdf;
+  float phi_pdf = fast_cosf(phi_i * 0.5f) * 0.25f * roughness2_inv;
+  float theta_pdf = roughness1 /
+                    (2 * (t * t + roughness1 * roughness1) * (a_R - b_R) * costheta_i);
+  *pdf = phi_pdf * theta_pdf;
 
-	return make_float3(*pdf, *pdf, *pdf);
+  return make_float3(*pdf, *pdf, *pdf);
 }
 
-ccl_device float3 bsdf_hair_transmission_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_hair_transmission_eval_reflect(const ShaderClosure *sc,
+                                                      const float3 I,
+                                                      const float3 omega_in,
+                                                      float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-
-ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc,
+                                                     const float3 I,
+                                                     const float3 omega_in,
+                                                     float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc,
+                                                       const float3 I,
+                                                       const float3 omega_in,
+                                                       float *pdf)
 {
-	const HairBsdf *bsdf = (const HairBsdf*)sc;
-	float offset = bsdf->offset;
-	float3 Tg = bsdf->T;
-	float roughness1 = bsdf->roughness1;
-	float roughness2 = bsdf->roughness2;
-	float Iz = dot(Tg, I);
-	float3 locy = normalize(I - Tg * Iz);
+  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  float offset = bsdf->offset;
+  float3 Tg = bsdf->T;
+  float roughness1 = bsdf->roughness1;
+  float roughness2 = bsdf->roughness2;
+  float Iz = dot(Tg, I);
+  float3 locy = normalize(I - Tg * Iz);
 
-	float theta_r = M_PI_2_F - fast_acosf(Iz);
+  float theta_r = M_PI_2_F - fast_acosf(Iz);
 
-	float omega_in_z = dot(Tg, omega_in);
-	float3 omega_in_y = normalize(omega_in - Tg * omega_in_z);
+  float omega_in_z = dot(Tg, omega_in);
+  float3 omega_in_y = normalize(omega_in - Tg * omega_in_z);
 
-	float theta_i = M_PI_2_F - fast_acosf(omega_in_z);
-	float phi_i = fast_acosf(dot(omega_in_y, locy));
+  float theta_i = M_PI_2_F - fast_acosf(omega_in_z);
+  float phi_i = fast_acosf(dot(omega_in_y, locy));
 
-	if(M_PI_2_F - fabsf(theta_i) < 0.001f) {
-		*pdf = 0.0f;
-		return make_float3(*pdf, *pdf, *pdf);
-	}
+  if (M_PI_2_F - fabsf(theta_i) < 0.001f) {
+    *pdf = 0.0f;
+    return make_float3(*pdf, *pdf, *pdf);
+  }
 
-	float costheta_i = fast_cosf(theta_i);
+  float costheta_i = fast_cosf(theta_i);
 
-	float roughness1_inv = 1.0f / roughness1;
-	float a_TT = fast_atan2f(((M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f);
-	float b_TT = fast_atan2f(((-M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f);
-	float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f);
+  float roughness1_inv = 1.0f / roughness1;
+  float a_TT = fast_atan2f(((M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f);
+  float b_TT = fast_atan2f(((-M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f);
+  float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f);
 
-	float theta_h = (theta_i + theta_r) / 2;
-	float t = theta_h - offset;
-	float phi = fabsf(phi_i);
+  float theta_h = (theta_i + theta_r) / 2;
+  float t = theta_h - offset;
+  float phi = fabsf(phi_i);
 
-	float p = M_PI_F - phi;
-	float theta_pdf = roughness1 / (2 * (t*t + roughness1 * roughness1) * (a_TT - b_TT)*costheta_i);
-	float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2));
+  float p = M_PI_F - phi;
+  float theta_pdf = roughness1 /
+                    (2 * (t * t + roughness1 * roughness1) * (a_TT - b_TT) * costheta_i);
+  float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2));
 
-	*pdf = phi_pdf * theta_pdf;
-	return make_float3(*pdf, *pdf, *pdf);
+  *pdf = phi_pdf * theta_pdf;
+  return make_float3(*pdf, *pdf, *pdf);
 }
 
-ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc,
+                                           float3 Ng,
+                                           float3 I,
+                                           float3 dIdx,
+                                           float3 dIdy,
+                                           float randu,
+                                           float randv,
+                                           float3 *eval,
+                                           float3 *omega_in,
+                                           float3 *domega_in_dx,
+                                           float3 *domega_in_dy,
+                                           float *pdf)
 {
-	const HairBsdf *bsdf = (const HairBsdf*)sc;
-	float offset = bsdf->offset;
-	float3 Tg = bsdf->T;
-	float roughness1 = bsdf->roughness1;
-	float roughness2 = bsdf->roughness2;
-	float Iz = dot(Tg, I);
-	float3 locy = normalize(I - Tg * Iz);
-	float3 locx = cross(locy, Tg);
-	float theta_r = M_PI_2_F - fast_acosf(Iz);
+  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  float offset = bsdf->offset;
+  float3 Tg = bsdf->T;
+  float roughness1 = bsdf->roughness1;
+  float roughness2 = bsdf->roughness2;
+  float Iz = dot(Tg, I);
+  float3 locy = normalize(I - Tg * Iz);
+  float3 locx = cross(locy, Tg);
+  float theta_r = M_PI_2_F - fast_acosf(Iz);
 
-	float roughness1_inv = 1.0f / roughness1;
-	float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
-	float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
+  float roughness1_inv = 1.0f / roughness1;
+  float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
+  float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f);
 
-	float t = roughness1 * tanf(randu * (a_R - b_R) + b_R);
+  float t = roughness1 * tanf(randu * (a_R - b_R) + b_R);
 
-	float theta_h = t + offset;
-	float theta_i = 2 * theta_h - theta_r;
+  float theta_h = t + offset;
+  float theta_i = 2 * theta_h - theta_r;
 
-	float costheta_i, sintheta_i;
-	fast_sincosf(theta_i, &sintheta_i, &costheta_i);
+  float costheta_i, sintheta_i;
+  fast_sincosf(theta_i, &sintheta_i, &costheta_i);
 
-	float phi = 2 * safe_asinf(1 - 2 * randv) * roughness2;
+  float phi = 2 * safe_asinf(1 - 2 * randv) * roughness2;
 
-	float phi_pdf = fast_cosf(phi * 0.5f) * 0.25f / roughness2;
+  float phi_pdf = fast_cosf(phi * 0.5f) * 0.25f / roughness2;
 
-	float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)*costheta_i);
+  float theta_pdf = roughness1 /
+                    (2 * (t * t + roughness1 * roughness1) * (a_R - b_R) * costheta_i);
 
-	float sinphi, cosphi;
-	fast_sincosf(phi, &sinphi, &cosphi);
-	*omega_in =(cosphi * costheta_i) * locy -
-	           (sinphi * costheta_i) * locx +
-	           (         sintheta_i) * Tg;
+  float sinphi, cosphi;
+  fast_sincosf(phi, &sinphi, &cosphi);
+  *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
 
-	//differentials - TODO: find a better approximation for the reflective bounce
+  //differentials - TODO: find a better approximation for the reflective bounce
 #ifdef __RAY_DIFFERENTIALS__
-	*domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
-	*domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
+  *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
+  *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
 #endif
 
-	*pdf = fabsf(phi_pdf * theta_pdf);
-	if(M_PI_2_F - fabsf(theta_i) < 0.001f)
-		*pdf = 0.0f;
+  *pdf = fabsf(phi_pdf * theta_pdf);
+  if (M_PI_2_F - fabsf(theta_i) < 0.001f)
+    *pdf = 0.0f;
 
-	*eval = make_float3(*pdf, *pdf, *pdf);
+  *eval = make_float3(*pdf, *pdf, *pdf);
 
-	return LABEL_REFLECT|LABEL_GLOSSY;
+  return LABEL_REFLECT | LABEL_GLOSSY;
 }
 
-ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc,
+                                             float3 Ng,
+                                             float3 I,
+                                             float3 dIdx,
+                                             float3 dIdy,
+                                             float randu,
+                                             float randv,
+                                             float3 *eval,
+                                             float3 *omega_in,
+                                             float3 *domega_in_dx,
+                                             float3 *domega_in_dy,
+                                             float *pdf)
 {
-	const HairBsdf *bsdf = (const HairBsdf*)sc;
-	float offset = bsdf->offset;
-	float3 Tg = bsdf->T;
-	float roughness1 = bsdf->roughness1;
-	float roughness2 = bsdf->roughness2;
-	float Iz = dot(Tg, I);
-	float3 locy = normalize(I - Tg * Iz);
-	float3 locx = cross(locy, Tg);
-	float theta_r = M_PI_2_F - fast_acosf(Iz);
-
-	float roughness1_inv = 1.0f / roughness1;
-	float a_TT = fast_atan2f(((M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f);
-	float b_TT = fast_atan2f(((-M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f);
-	float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f);
-
-	float t = roughness1 * tanf(randu * (a_TT - b_TT) + b_TT);
-
-	float theta_h = t + offset;
-	float theta_i = 2 * theta_h - theta_r;
-
-	float costheta_i, sintheta_i;
-	fast_sincosf(theta_i, &sintheta_i, &costheta_i);
-
-	float p = roughness2 * tanf(c_TT * (randv - 0.5f));
-	float phi = p + M_PI_F;
-	float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_TT - b_TT) * costheta_i);
-	float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2));
-
-	float sinphi, cosphi;
-	fast_sincosf(phi, &sinphi, &cosphi);
-	*omega_in =(cosphi * costheta_i) * locy -
-	           (sinphi * costheta_i) * locx +
-	           (         sintheta_i) * Tg;
-
-	//differentials - TODO: find a better approximation for the transmission bounce
+  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  float offset = bsdf->offset;
+  float3 Tg = bsdf->T;
+  float roughness1 = bsdf->roughness1;
+  float roughness2 = bsdf->roughness2;
+  float Iz = dot(Tg, I);
+  float3 locy = normalize(I - Tg * Iz);
+  float3 locx = cross(locy, Tg);
+  float theta_r = M_PI_2_F - fast_acosf(Iz);
+
+  float roughness1_inv = 1.0f / roughness1;
+  float a_TT = fast_atan2f(((M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f);
+  float b_TT = fast_atan2f(((-M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f);
+  float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f);
+
+  float t = roughness1 * tanf(randu * (a_TT - b_TT) + b_TT);
+
+  float theta_h = t + offset;
+  float theta_i = 2 * theta_h - theta_r;
+
+  float costheta_i, sintheta_i;
+  fast_sincosf(theta_i, &sintheta_i, &costheta_i);
+
+  float p = roughness2 * tanf(c_TT * (randv - 0.5f));
+  float phi = p + M_PI_F;
+  float theta_pdf = roughness1 /
+                    (2 * (t * t + roughness1 * roughness1) * (a_TT - b_TT) * costheta_i);
+  float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2));
+
+  float sinphi, cosphi;
+  fast_sincosf(phi, &sinphi, &cosphi);
+  *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
+
+  //differentials - TODO: find a better approximation for the transmission bounce
 #ifdef __RAY_DIFFERENTIALS__
-	*domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
-	*domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
+  *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
+  *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
 #endif
 
-	*pdf = fabsf(phi_pdf * theta_pdf);
-	if(M_PI_2_F - fabsf(theta_i) < 0.001f) {
-		*pdf = 0.0f;
-	}
+  *pdf = fabsf(phi_pdf * theta_pdf);
+  if (M_PI_2_F - fabsf(theta_i) < 0.001f) {
+    *pdf = 0.0f;
+  }
 
-	*eval = make_float3(*pdf, *pdf, *pdf);
+  *eval = make_float3(*pdf, *pdf, *pdf);
 
-	/* TODO(sergey): Should always be negative, but seems some precision issue
-	 * is involved here.
-	 */
-	kernel_assert(dot(locy, *omega_in) < 1e-4f);
+  /* TODO(sergey): Should always be negative, but seems some precision issue
+   * is involved here.
+   */
+  kernel_assert(dot(locy, *omega_in) < 1e-4f);
 
-	return LABEL_TRANSMIT|LABEL_GLOSSY;
+  return LABEL_TRANSMIT | LABEL_GLOSSY;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_HAIR_H__ */
+#endif /* __BSDF_HAIR_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index 68335ee887a..a4bba2fbf6c 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -15,251 +15,245 @@
  */
 
 #ifdef __KERNEL_CPU__
-#include <fenv.h>
+#  include <fenv.h>
 #endif
 
 #include "kernel/kernel_color.h"
 
 #ifndef __BSDF_HAIR_PRINCIPLED_H__
-#define __BSDF_HAIR_PRINCIPLED_H__
+#  define __BSDF_HAIR_PRINCIPLED_H__
 
 CCL_NAMESPACE_BEGIN
 
 typedef ccl_addr_space struct PrincipledHairExtra {
-	/* Geometry data. */
-	float4 geom;
+  /* Geometry data. */
+  float4 geom;
 } PrincipledHairExtra;
 
 typedef ccl_addr_space struct PrincipledHairBSDF {
-	SHADER_CLOSURE_BASE;
-
-	/* Absorption coefficient. */
-	float3 sigma;
-	/* Variance of the underlying logistic distribution. */
-	float v;
-	/* Scale factor of the underlying logistic distribution. */
-	float s;
-	/* Cuticle tilt angle. */
-	float alpha;
-	/* IOR. */
-	float eta;
-	/* Effective variance for the diffuse bounce only. */
-	float m0_roughness;
-
-	/* Extra closure. */
-	PrincipledHairExtra *extra;
+  SHADER_CLOSURE_BASE;
+
+  /* Absorption coefficient. */
+  float3 sigma;
+  /* Variance of the underlying logistic distribution. */
+  float v;
+  /* Scale factor of the underlying logistic distribution. */
+  float s;
+  /* Cuticle tilt angle. */
+  float alpha;
+  /* IOR. */
+  float eta;
+  /* Effective variance for the diffuse bounce only. */
+  float m0_roughness;
+
+  /* Extra closure. */
+  PrincipledHairExtra *extra;
 } PrincipledHairBSDF;
 
-static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairBSDF), "PrincipledHairBSDF is too large!");
-static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairExtra), "PrincipledHairExtra is too large!");
+static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairBSDF),
+              "PrincipledHairBSDF is too large!");
+static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairExtra),
+              "PrincipledHairExtra is too large!");
 
 ccl_device_inline float cos_from_sin(const float s)
 {
-	return safe_sqrtf(1.0f - s*s);
+  return safe_sqrtf(1.0f - s * s);
 }
 
 /* Gives the change in direction in the normal plane for the given angles and p-th-order scattering. */
 ccl_device_inline float delta_phi(int p, float gamma_o, float gamma_t)
 {
-	return 2.0f * p * gamma_t - 2.0f * gamma_o + p * M_PI_F;
+  return 2.0f * p * gamma_t - 2.0f * gamma_o + p * M_PI_F;
 }
 
 /* Remaps the given angle to [-pi, pi]. */
 ccl_device_inline float wrap_angle(float a)
 {
-	while(a > M_PI_F) {
-		a -= M_2PI_F;
-	}
-	while(a < -M_PI_F) {
-		a += M_2PI_F;
-	}
-	return a;
+  while (a > M_PI_F) {
+    a -= M_2PI_F;
+  }
+  while (a < -M_PI_F) {
+    a += M_2PI_F;
+  }
+  return a;
 }
 
 /* Logistic distribution function. */
 ccl_device_inline float logistic(float x, float s)
 {
-	float v = expf(-fabsf(x)/s);
-	return v / (s * sqr(1.0f + v));
+  float v = expf(-fabsf(x) / s);
+  return v / (s * sqr(1.0f + v));
 }
 
 /* Logistic cumulative density function. */
 ccl_device_inline float logistic_cdf(float x, float s)
 {
-	float arg = -x/s;
-	/* expf() overflows if arg >= 89.0. */
-	if(arg > 88.0f) {
-		return 0.0f;
-	}
-	else {
-		return 1.0f / (1.0f + expf(arg));
-	}
+  float arg = -x / s;
+  /* expf() overflows if arg >= 89.0. */
+  if (arg > 88.0f) {
+    return 0.0f;
+  }
+  else {
+    return 1.0f / (1.0f + expf(arg));
+  }
 }
 
 /* Numerical approximation to the Bessel function of the first kind. */
 ccl_device_inline float bessel_I0(float x)
 {
-	x = sqr(x);
-	float val = 1.0f + 0.25f*x;
-	float pow_x_2i = sqr(x);
-	uint64_t i_fac_2 = 1;
-	int pow_4_i = 16;
-	for(int i = 2; i < 10; i++) {
-		i_fac_2 *= i*i;
-		float newval = val + pow_x_2i / (pow_4_i * i_fac_2);
-		if(val == newval) {
-			return val;
-		}
-		val = newval;
-		pow_x_2i *= x;
-		pow_4_i *= 4;
-	}
-	return val;
+  x = sqr(x);
+  float val = 1.0f + 0.25f * x;
+  float pow_x_2i = sqr(x);
+  uint64_t i_fac_2 = 1;
+  int pow_4_i = 16;
+  for (int i = 2; i < 10; i++) {
+    i_fac_2 *= i * i;
+    float newval = val + pow_x_2i / (pow_4_i * i_fac_2);
+    if (val == newval) {
+      return val;
+    }
+    val = newval;
+    pow_x_2i *= x;
+    pow_4_i *= 4;
+  }
+  return val;
 }
 
 /* Logarithm of the Bessel function of the first kind. */
 ccl_device_inline float log_bessel_I0(float x)
 {
-	if(x > 12.0f) {
-		/* log(1/x) == -log(x) iff x > 0.
-		 * This is only used with positive cosines */
-		return x + 0.5f * (1.f / (8.0f * x) - M_LN_2PI_F - logf(x));
-	}
-	else {
-		return logf(bessel_I0(x));
-	}
+  if (x > 12.0f) {
+    /* log(1/x) == -log(x) iff x > 0.
+     * This is only used with positive cosines */
+    return x + 0.5f * (1.f / (8.0f * x) - M_LN_2PI_F - logf(x));
+  }
+  else {
+    return logf(bessel_I0(x));
+  }
 }
 
 /* Logistic distribution limited to the interval [-pi, pi]. */
 ccl_device_inline float trimmed_logistic(float x, float s)
 {
-	/* The logistic distribution is symmetric and centered around zero,
-	 * so logistic_cdf(x, s) = 1 - logistic_cdf(-x, s).
-	 * Therefore, logistic_cdf(x, s)-logistic_cdf(-x, s) = 1 - 2*logistic_cdf(-x, s) */
-	float scaling_fac = 1.0f - 2.0f*logistic_cdf(-M_PI_F, s);
-	float val = logistic(x, s);
-	return safe_divide(val, scaling_fac);
+  /* The logistic distribution is symmetric and centered around zero,
+   * so logistic_cdf(x, s) = 1 - logistic_cdf(-x, s).
+   * Therefore, logistic_cdf(x, s)-logistic_cdf(-x, s) = 1 - 2*logistic_cdf(-x, s) */
+  float scaling_fac = 1.0f - 2.0f * logistic_cdf(-M_PI_F, s);
+  float val = logistic(x, s);
+  return safe_divide(val, scaling_fac);
 }
 
 /* Sampling function for the trimmed logistic function. */
 ccl_device_inline float sample_trimmed_logistic(float u, float s)
 {
-	float cdf_minuspi = logistic_cdf(-M_PI_F, s);
-	float x = -s*logf(1.0f / (u*(1.0f - 2.0f*cdf_minuspi) + cdf_minuspi) - 1.0f);
-	return clamp(x, -M_PI_F, M_PI_F);
+  float cdf_minuspi = logistic_cdf(-M_PI_F, s);
+  float x = -s * logf(1.0f / (u * (1.0f - 2.0f * cdf_minuspi) + cdf_minuspi) - 1.0f);
+  return clamp(x, -M_PI_F, M_PI_F);
 }
 
 /* Azimuthal scattering function Np. */
-ccl_device_inline float azimuthal_scattering(float phi,
-                                             int p,
-                                             float s,
-                                             float gamma_o,
-                                             float gamma_t)
+ccl_device_inline float azimuthal_scattering(
+    float phi, int p, float s, float gamma_o, float gamma_t)
 {
-	float phi_o = wrap_angle(phi - delta_phi(p, gamma_o, gamma_t));
-	float val = trimmed_logistic(phi_o, s);
-	return val;
+  float phi_o = wrap_angle(phi - delta_phi(p, gamma_o, gamma_t));
+  float val = trimmed_logistic(phi_o, s);
+  return val;
 }
 
 /* Longitudinal scattering function Mp. */
-ccl_device_inline float longitudinal_scattering(float sin_theta_i,
-                                                float cos_theta_i,
-                                                float sin_theta_o,
-                                                float cos_theta_o,
-                                                float v)
+ccl_device_inline float longitudinal_scattering(
+    float sin_theta_i, float cos_theta_i, float sin_theta_o, float cos_theta_o, float v)
 {
-	float inv_v = 1.0f/v;
-	float cos_arg = cos_theta_i * cos_theta_o * inv_v;
-	float sin_arg = sin_theta_i * sin_theta_o * inv_v;
-	if(v <= 0.1f) {
-		float i0 = log_bessel_I0(cos_arg);
-		float val = expf(i0 - sin_arg - inv_v + 0.6931f + logf(0.5f*inv_v));
-		return val;
-	}
-	else {
-		float i0 = bessel_I0(cos_arg);
-		float val = (expf(-sin_arg) * i0) / (sinhf(inv_v) * 2.0f * v);
-		return val;
-	}
+  float inv_v = 1.0f / v;
+  float cos_arg = cos_theta_i * cos_theta_o * inv_v;
+  float sin_arg = sin_theta_i * sin_theta_o * inv_v;
+  if (v <= 0.1f) {
+    float i0 = log_bessel_I0(cos_arg);
+    float val = expf(i0 - sin_arg - inv_v + 0.6931f + logf(0.5f * inv_v));
+    return val;
+  }
+  else {
+    float i0 = bessel_I0(cos_arg);
+    float val = (expf(-sin_arg) * i0) / (sinhf(inv_v) * 2.0f * v);
+    return val;
+  }
 }
 
 /* Combine the three values using their luminances. */
 ccl_device_inline float4 combine_with_energy(KernelGlobals *kg, float3 c)
 {
-	return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c));
+  return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c));
 }
 
-#ifdef __HAIR__
+#  ifdef __HAIR__
 /* Set up the hair closure. */
 ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_HAIR_PRINCIPLED_ID;
-	bsdf->v = clamp(bsdf->v, 0.001f, 1.0f);
-	bsdf->s = clamp(bsdf->s, 0.001f, 1.0f);
-	/* Apply Primary Reflection Roughness modifier. */
-	bsdf->m0_roughness = clamp(bsdf->m0_roughness*bsdf->v, 0.001f, 1.0f);
-
-	/* Map from roughness_u and roughness_v to variance and scale factor. */
-	bsdf->v = sqr(0.726f*bsdf->v + 0.812f*sqr(bsdf->v) + 3.700f*pow20(bsdf->v));
-	bsdf->s =    (0.265f*bsdf->s + 1.194f*sqr(bsdf->s) + 5.372f*pow22(bsdf->s))*M_SQRT_PI_8_F;
-	bsdf->m0_roughness = sqr(0.726f*bsdf->m0_roughness + 0.812f*sqr(bsdf->m0_roughness) + 3.700f*pow20(bsdf->m0_roughness));
-
-	/* Compute local frame, aligned to curve tangent and ray direction. */
-	float3 X = safe_normalize(sd->dPdu);
-	float3 Y = safe_normalize(cross(X, sd->I));
-	float3 Z = safe_normalize(cross(X, Y));
-	/* TODO: the solution below works where sd->Ng is the normal
-	 * pointing from the center of the curve to the shading point.
-	 * It doesn't work for triangles, see https://developer.blender.org/T43625 */
-
-	/* h -1..0..1 means the rays goes from grazing the hair, to hitting it at
-	 * the center, to grazing the other edge. This is the sine of the angle
-	 * between sd->Ng and Z, as seen from the tangent X. */
-
-	/* TODO: we convert this value to a cosine later and discard the sign, so
-	 * we could probably save some operations. */
-	float h = dot(cross(sd->Ng, X), Z);
-
-	kernel_assert(fabsf(h) < 1.0f + 1e-4f);
-	kernel_assert(isfinite3_safe(Y));
-	kernel_assert(isfinite_safe(h));
-
-	bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h);
-
-	return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
+  bsdf->type = CLOSURE_BSDF_HAIR_PRINCIPLED_ID;
+  bsdf->v = clamp(bsdf->v, 0.001f, 1.0f);
+  bsdf->s = clamp(bsdf->s, 0.001f, 1.0f);
+  /* Apply Primary Reflection Roughness modifier. */
+  bsdf->m0_roughness = clamp(bsdf->m0_roughness * bsdf->v, 0.001f, 1.0f);
+
+  /* Map from roughness_u and roughness_v to variance and scale factor. */
+  bsdf->v = sqr(0.726f * bsdf->v + 0.812f * sqr(bsdf->v) + 3.700f * pow20(bsdf->v));
+  bsdf->s = (0.265f * bsdf->s + 1.194f * sqr(bsdf->s) + 5.372f * pow22(bsdf->s)) * M_SQRT_PI_8_F;
+  bsdf->m0_roughness = sqr(0.726f * bsdf->m0_roughness + 0.812f * sqr(bsdf->m0_roughness) +
+                           3.700f * pow20(bsdf->m0_roughness));
+
+  /* Compute local frame, aligned to curve tangent and ray direction. */
+  float3 X = safe_normalize(sd->dPdu);
+  float3 Y = safe_normalize(cross(X, sd->I));
+  float3 Z = safe_normalize(cross(X, Y));
+  /* TODO: the solution below works where sd->Ng is the normal
+   * pointing from the center of the curve to the shading point.
+   * It doesn't work for triangles, see https://developer.blender.org/T43625 */
+
+  /* h -1..0..1 means the rays goes from grazing the hair, to hitting it at
+   * the center, to grazing the other edge. This is the sine of the angle
+   * between sd->Ng and Z, as seen from the tangent X. */
+
+  /* TODO: we convert this value to a cosine later and discard the sign, so
+   * we could probably save some operations. */
+  float h = dot(cross(sd->Ng, X), Z);
+
+  kernel_assert(fabsf(h) < 1.0f + 1e-4f);
+  kernel_assert(isfinite3_safe(Y));
+  kernel_assert(isfinite_safe(h));
+
+  bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h);
+
+  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }
 
-#endif  /* __HAIR__ */
+#  endif /* __HAIR__ */
 
 /* Given the Fresnel term and transmittance, generate the attenuation terms for each bounce. */
-ccl_device_inline void hair_attenuation(KernelGlobals *kg,
-                                        float f,
-                                        float3 T,
-                                        float4 *Ap)
+ccl_device_inline void hair_attenuation(KernelGlobals *kg, float f, float3 T, float4 *Ap)
 {
-	/* Primary specular (R). */
-	Ap[0] = make_float4(f, f, f, f);
+  /* Primary specular (R). */
+  Ap[0] = make_float4(f, f, f, f);
 
-	/* Transmission (TT). */
-	float3 col = sqr(1.0f - f) * T;
-	Ap[1] = combine_with_energy(kg, col);
+  /* Transmission (TT). */
+  float3 col = sqr(1.0f - f) * T;
+  Ap[1] = combine_with_energy(kg, col);
 
-	/* Secondary specular (TRT). */
-	col *= T*f;
-	Ap[2] = combine_with_energy(kg, col);
+  /* Secondary specular (TRT). */
+  col *= T * f;
+  Ap[2] = combine_with_energy(kg, col);
 
-	/* Residual component (TRRT+). */
-	col *= safe_divide_color(T*f, make_float3(1.0f, 1.0f, 1.0f) - T*f);
-	Ap[3] = combine_with_energy(kg, col);
+  /* Residual component (TRRT+). */
+  col *= safe_divide_color(T * f, make_float3(1.0f, 1.0f, 1.0f) - T * f);
+  Ap[3] = combine_with_energy(kg, col);
 
-	/* Normalize sampling weights. */
-	float totweight = Ap[0].w + Ap[1].w + Ap[2].w + Ap[3].w;
-	float fac = safe_divide(1.0f, totweight);
+  /* Normalize sampling weights. */
+  float totweight = Ap[0].w + Ap[1].w + Ap[2].w + Ap[3].w;
+  float fac = safe_divide(1.0f, totweight);
 
-	Ap[0].w *= fac;
-	Ap[1].w *= fac;
-	Ap[2].w *= fac;
-	Ap[3].w *= fac;
+  Ap[0].w *= fac;
+  Ap[1].w *= fac;
+  Ap[2].w *= fac;
+  Ap[3].w *= fac;
 }
 
 /* Given the tilt angle, generate the rotated theta_i for the different bounces. */
@@ -268,19 +262,19 @@ ccl_device_inline void hair_alpha_angles(float sin_theta_i,
                                          float alpha,
                                          float *angles)
 {
-	float sin_1alpha = sinf(alpha);
-	float cos_1alpha = cos_from_sin(sin_1alpha);
-	float sin_2alpha = 2.0f*sin_1alpha*cos_1alpha;
-	float cos_2alpha = sqr(cos_1alpha) - sqr(sin_1alpha);
-	float sin_4alpha = 2.0f*sin_2alpha*cos_2alpha;
-	float cos_4alpha = sqr(cos_2alpha) - sqr(sin_2alpha);
-
-	angles[0] = sin_theta_i*cos_2alpha + cos_theta_i*sin_2alpha;
-	angles[1] = fabsf(cos_theta_i*cos_2alpha - sin_theta_i*sin_2alpha);
-	angles[2] = sin_theta_i*cos_1alpha - cos_theta_i*sin_1alpha;
-	angles[3] = fabsf(cos_theta_i*cos_1alpha + sin_theta_i*sin_1alpha);
-	angles[4] = sin_theta_i*cos_4alpha - cos_theta_i*sin_4alpha;
-	angles[5] = fabsf(cos_theta_i*cos_4alpha + sin_theta_i*sin_4alpha);
+  float sin_1alpha = sinf(alpha);
+  float cos_1alpha = cos_from_sin(sin_1alpha);
+  float sin_2alpha = 2.0f * sin_1alpha * cos_1alpha;
+  float cos_2alpha = sqr(cos_1alpha) - sqr(sin_1alpha);
+  float sin_4alpha = 2.0f * sin_2alpha * cos_2alpha;
+  float cos_4alpha = sqr(cos_2alpha) - sqr(sin_2alpha);
+
+  angles[0] = sin_theta_i * cos_2alpha + cos_theta_i * sin_2alpha;
+  angles[1] = fabsf(cos_theta_i * cos_2alpha - sin_theta_i * sin_2alpha);
+  angles[2] = sin_theta_i * cos_1alpha - cos_theta_i * sin_1alpha;
+  angles[3] = fabsf(cos_theta_i * cos_1alpha + sin_theta_i * sin_1alpha);
+  angles[4] = sin_theta_i * cos_4alpha - cos_theta_i * sin_4alpha;
+  angles[5] = fabsf(cos_theta_i * cos_4alpha + sin_theta_i * sin_4alpha);
 }
 
 /* Evaluation function for our shader. */
@@ -290,75 +284,75 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals *kg,
                                             const float3 omega_in,
                                             float *pdf)
 {
-	kernel_assert(isfinite3_safe(sd->P) && isfinite_safe(sd->ray_length));
+  kernel_assert(isfinite3_safe(sd->P) && isfinite_safe(sd->ray_length));
 
-	const PrincipledHairBSDF *bsdf = (const PrincipledHairBSDF*) sc;
-	float3 Y = float4_to_float3(bsdf->extra->geom);
+  const PrincipledHairBSDF *bsdf = (const PrincipledHairBSDF *)sc;
+  float3 Y = float4_to_float3(bsdf->extra->geom);
 
-	float3 X = safe_normalize(sd->dPdu);
-	kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
-	float3 Z = safe_normalize(cross(X, Y));
+  float3 X = safe_normalize(sd->dPdu);
+  kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
+  float3 Z = safe_normalize(cross(X, Y));
 
-	float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
-	float3 wi = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+  float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
+  float3 wi = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
 
-	float sin_theta_o = wo.x;
-	float cos_theta_o = cos_from_sin(sin_theta_o);
-	float phi_o = atan2f(wo.z, wo.y);
+  float sin_theta_o = wo.x;
+  float cos_theta_o = cos_from_sin(sin_theta_o);
+  float phi_o = atan2f(wo.z, wo.y);
 
-	float sin_theta_t = sin_theta_o / bsdf->eta;
-	float cos_theta_t = cos_from_sin(sin_theta_t);
+  float sin_theta_t = sin_theta_o / bsdf->eta;
+  float cos_theta_t = cos_from_sin(sin_theta_t);
 
-	float sin_gamma_o = bsdf->extra->geom.w;
-	float cos_gamma_o = cos_from_sin(sin_gamma_o);
-	float gamma_o = safe_asinf(sin_gamma_o);
+  float sin_gamma_o = bsdf->extra->geom.w;
+  float cos_gamma_o = cos_from_sin(sin_gamma_o);
+  float gamma_o = safe_asinf(sin_gamma_o);
 
-	float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
-	float cos_gamma_t = cos_from_sin(sin_gamma_t);
-	float gamma_t = safe_asinf(sin_gamma_t);
+  float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
+  float cos_gamma_t = cos_from_sin(sin_gamma_t);
+  float gamma_t = safe_asinf(sin_gamma_t);
 
-	float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
-	float4 Ap[4];
-	hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
+  float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
+  float4 Ap[4];
+  hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
 
-	float sin_theta_i = wi.x;
-	float cos_theta_i = cos_from_sin(sin_theta_i);
-	float phi_i = atan2f(wi.z, wi.y);
+  float sin_theta_i = wi.x;
+  float cos_theta_i = cos_from_sin(sin_theta_i);
+  float phi_i = atan2f(wi.z, wi.y);
 
-	float phi = phi_i - phi_o;
+  float phi = phi_i - phi_o;
 
-	float angles[6];
-	hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
+  float angles[6];
+  hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
 
-	float4 F;
-	float Mp, Np;
+  float4 F;
+  float Mp, Np;
 
-	/* Primary specular (R). */
-	Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
-	Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
-	F  = Ap[0] * Mp * Np;
-	kernel_assert(isfinite3_safe(float4_to_float3(F)));
+  /* Primary specular (R). */
+  Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
+  Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
+  F = Ap[0] * Mp * Np;
+  kernel_assert(isfinite3_safe(float4_to_float3(F)));
 
-	/* Transmission (TT). */
-	Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f*bsdf->v);
-	Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
-	F += Ap[1] * Mp * Np;
-	kernel_assert(isfinite3_safe(float4_to_float3(F)));
+  /* Transmission (TT). */
+  Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v);
+  Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
+  F += Ap[1] * Mp * Np;
+  kernel_assert(isfinite3_safe(float4_to_float3(F)));
 
-	/* Secondary specular (TRT). */
-	Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f*bsdf->v);
-	Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
-	F += Ap[2] * Mp * Np;
-	kernel_assert(isfinite3_safe(float4_to_float3(F)));
+  /* Secondary specular (TRT). */
+  Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
+  Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
+  F += Ap[2] * Mp * Np;
+  kernel_assert(isfinite3_safe(float4_to_float3(F)));
 
-	/* Residual component (TRRT+). */
-	Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f*bsdf->v);
-	Np = M_1_2PI_F;
-	F += Ap[3] * Mp * Np;
-	kernel_assert(isfinite3_safe(float4_to_float3(F)));
+  /* Residual component (TRRT+). */
+  Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
+  Np = M_1_2PI_F;
+  F += Ap[3] * Mp * Np;
+  kernel_assert(isfinite3_safe(float4_to_float3(F)));
 
-	*pdf = F.w;
-	return float4_to_float3(F);
+  *pdf = F.w;
+  return float4_to_float3(F);
 }
 
 /* Sampling function for the hair shader. */
@@ -373,130 +367,131 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals *kg,
                                            float3 *domega_in_dy,
                                            float *pdf)
 {
-	PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*) sc;
-
-	float3 Y = float4_to_float3(bsdf->extra->geom);
-
-	float3 X = safe_normalize(sd->dPdu);
-	kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
-	float3 Z = safe_normalize(cross(X, Y));
-
-	float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
-
-	float2 u[2];
-	u[0] = make_float2(randu, randv);
-	u[1].x = lcg_step_float_addrspace(&sd->lcg_state);
-	u[1].y = lcg_step_float_addrspace(&sd->lcg_state);
-
-	float sin_theta_o = wo.x;
-	float cos_theta_o = cos_from_sin(sin_theta_o);
-	float phi_o = atan2f(wo.z, wo.y);
-
-	float sin_theta_t = sin_theta_o / bsdf->eta;
-	float cos_theta_t = cos_from_sin(sin_theta_t);
-
-	float sin_gamma_o = bsdf->extra->geom.w;
-	float cos_gamma_o = cos_from_sin(sin_gamma_o);
-	float gamma_o = safe_asinf(sin_gamma_o);
-
-	float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
-	float cos_gamma_t = cos_from_sin(sin_gamma_t);
-	float gamma_t = safe_asinf(sin_gamma_t);
-
-	float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
-	float4 Ap[4];
-	hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
-
-	int p = 0;
-	for(; p < 3; p++) {
-		if(u[0].x < Ap[p].w) {
-			break;
-		}
-		u[0].x -= Ap[p].w;
-	}
-
-	float v = bsdf->v;
-	if(p == 1) {
-		v *= 0.25f;
-	}
-	if(p >= 2) {
-		v *= 4.0f;
-	}
-
-	u[1].x = max(u[1].x, 1e-5f);
-	float fac = 1.0f + v*logf(u[1].x + (1.0f - u[1].x)*expf(-2.0f/v));
-	float sin_theta_i = -fac * sin_theta_o + cos_from_sin(fac) * cosf(M_2PI_F * u[1].y) * cos_theta_o;
-	float cos_theta_i = cos_from_sin(sin_theta_i);
-
-	float angles[6];
-	if(p < 3) {
-		hair_alpha_angles(sin_theta_i, cos_theta_i, -bsdf->alpha, angles);
-		sin_theta_i = angles[2*p];
-		cos_theta_i = angles[2*p+1];
-	}
-
-	float phi;
-	if(p < 3) {
-		phi = delta_phi(p, gamma_o, gamma_t) + sample_trimmed_logistic(u[0].y, bsdf->s);
-	}
-	else {
-		phi = M_2PI_F*u[0].y;
-	}
-	float phi_i = phi_o + phi;
-
-	hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
-
-	float4 F;
-	float Mp, Np;
-
-	/* Primary specular (R). */
-	Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
-	Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
-	F  = Ap[0] * Mp * Np;
-	kernel_assert(isfinite3_safe(float4_to_float3(F)));
-
-	/* Transmission (TT). */
-	Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f*bsdf->v);
-	Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
-	F += Ap[1] * Mp * Np;
-	kernel_assert(isfinite3_safe(float4_to_float3(F)));
-
-	/* Secondary specular (TRT). */
-	Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f*bsdf->v);
-	Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
-	F += Ap[2] * Mp * Np;
-	kernel_assert(isfinite3_safe(float4_to_float3(F)));
-
-	/* Residual component (TRRT+). */
-	Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f*bsdf->v);
-	Np = M_1_2PI_F;
-	F += Ap[3] * Mp * Np;
-	kernel_assert(isfinite3_safe(float4_to_float3(F)));
-
-	*eval = float4_to_float3(F);
-	*pdf = F.w;
-
-	*omega_in = X*sin_theta_i + Y*cos_theta_i*cosf(phi_i) + Z*cos_theta_i*sinf(phi_i);
-
-#ifdef __RAY_DIFFERENTIALS__
-	float3 N = safe_normalize(sd->I + *omega_in);
-	*domega_in_dx = (2 * dot(N, sd->dI.dx)) * N - sd->dI.dx;
-	*domega_in_dy = (2 * dot(N, sd->dI.dy)) * N - sd->dI.dy;
-#endif
-
-	return LABEL_GLOSSY|((p == 0)? LABEL_REFLECT : LABEL_TRANSMIT);
+  PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
+
+  float3 Y = float4_to_float3(bsdf->extra->geom);
+
+  float3 X = safe_normalize(sd->dPdu);
+  kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
+  float3 Z = safe_normalize(cross(X, Y));
+
+  float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
+
+  float2 u[2];
+  u[0] = make_float2(randu, randv);
+  u[1].x = lcg_step_float_addrspace(&sd->lcg_state);
+  u[1].y = lcg_step_float_addrspace(&sd->lcg_state);
+
+  float sin_theta_o = wo.x;
+  float cos_theta_o = cos_from_sin(sin_theta_o);
+  float phi_o = atan2f(wo.z, wo.y);
+
+  float sin_theta_t = sin_theta_o / bsdf->eta;
+  float cos_theta_t = cos_from_sin(sin_theta_t);
+
+  float sin_gamma_o = bsdf->extra->geom.w;
+  float cos_gamma_o = cos_from_sin(sin_gamma_o);
+  float gamma_o = safe_asinf(sin_gamma_o);
+
+  float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
+  float cos_gamma_t = cos_from_sin(sin_gamma_t);
+  float gamma_t = safe_asinf(sin_gamma_t);
+
+  float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
+  float4 Ap[4];
+  hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
+
+  int p = 0;
+  for (; p < 3; p++) {
+    if (u[0].x < Ap[p].w) {
+      break;
+    }
+    u[0].x -= Ap[p].w;
+  }
+
+  float v = bsdf->v;
+  if (p == 1) {
+    v *= 0.25f;
+  }
+  if (p >= 2) {
+    v *= 4.0f;
+  }
+
+  u[1].x = max(u[1].x, 1e-5f);
+  float fac = 1.0f + v * logf(u[1].x + (1.0f - u[1].x) * expf(-2.0f / v));
+  float sin_theta_i = -fac * sin_theta_o +
+                      cos_from_sin(fac) * cosf(M_2PI_F * u[1].y) * cos_theta_o;
+  float cos_theta_i = cos_from_sin(sin_theta_i);
+
+  float angles[6];
+  if (p < 3) {
+    hair_alpha_angles(sin_theta_i, cos_theta_i, -bsdf->alpha, angles);
+    sin_theta_i = angles[2 * p];
+    cos_theta_i = angles[2 * p + 1];
+  }
+
+  float phi;
+  if (p < 3) {
+    phi = delta_phi(p, gamma_o, gamma_t) + sample_trimmed_logistic(u[0].y, bsdf->s);
+  }
+  else {
+    phi = M_2PI_F * u[0].y;
+  }
+  float phi_i = phi_o + phi;
+
+  hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
+
+  float4 F;
+  float Mp, Np;
+
+  /* Primary specular (R). */
+  Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
+  Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
+  F = Ap[0] * Mp * Np;
+  kernel_assert(isfinite3_safe(float4_to_float3(F)));
+
+  /* Transmission (TT). */
+  Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v);
+  Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
+  F += Ap[1] * Mp * Np;
+  kernel_assert(isfinite3_safe(float4_to_float3(F)));
+
+  /* Secondary specular (TRT). */
+  Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
+  Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
+  F += Ap[2] * Mp * Np;
+  kernel_assert(isfinite3_safe(float4_to_float3(F)));
+
+  /* Residual component (TRRT+). */
+  Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
+  Np = M_1_2PI_F;
+  F += Ap[3] * Mp * Np;
+  kernel_assert(isfinite3_safe(float4_to_float3(F)));
+
+  *eval = float4_to_float3(F);
+  *pdf = F.w;
+
+  *omega_in = X * sin_theta_i + Y * cos_theta_i * cosf(phi_i) + Z * cos_theta_i * sinf(phi_i);
+
+#  ifdef __RAY_DIFFERENTIALS__
+  float3 N = safe_normalize(sd->I + *omega_in);
+  *domega_in_dx = (2 * dot(N, sd->dI.dx)) * N - sd->dI.dx;
+  *domega_in_dy = (2 * dot(N, sd->dI.dy)) * N - sd->dI.dy;
+#  endif
+
+  return LABEL_GLOSSY | ((p == 0) ? LABEL_REFLECT : LABEL_TRANSMIT);
 }
 
 /* Implements Filter Glossy by capping the effective roughness. */
 ccl_device void bsdf_principled_hair_blur(ShaderClosure *sc, float roughness)
 {
-	PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)sc;
+  PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
 
-	bsdf->v = fmaxf(roughness, bsdf->v);
-	bsdf->s = fmaxf(roughness, bsdf->s);
-	bsdf->m0_roughness = fmaxf(roughness, bsdf->m0_roughness);
+  bsdf->v = fmaxf(roughness, bsdf->v);
+  bsdf->s = fmaxf(roughness, bsdf->s);
+  bsdf->m0_roughness = fmaxf(roughness, bsdf->m0_roughness);
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_HAIR_PRINCIPLED_H__ */
+#endif /* __BSDF_HAIR_PRINCIPLED_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 32b6e50b09a..b4da3123f28 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -36,95 +36,98 @@
 CCL_NAMESPACE_BEGIN
 
 typedef ccl_addr_space struct MicrofacetExtra {
-	float3 color, cspec0;
-	float clearcoat;
+  float3 color, cspec0;
+  float clearcoat;
 } MicrofacetExtra;
 
 typedef ccl_addr_space struct MicrofacetBsdf {
-	SHADER_CLOSURE_BASE;
+  SHADER_CLOSURE_BASE;
 
-	float alpha_x, alpha_y, ior;
-	MicrofacetExtra *extra;
-	float3 T;
+  float alpha_x, alpha_y, ior;
+  MicrofacetExtra *extra;
+  float3 T;
 } MicrofacetBsdf;
 
 /* Beckmann and GGX microfacet importance sampling. */
 
-ccl_device_inline void microfacet_beckmann_sample_slopes(
-	KernelGlobals *kg,
-	const float cos_theta_i, const float sin_theta_i,
-	float randu, float randv, float *slope_x, float *slope_y,
-	float *G1i)
+ccl_device_inline void microfacet_beckmann_sample_slopes(KernelGlobals *kg,
+                                                         const float cos_theta_i,
+                                                         const float sin_theta_i,
+                                                         float randu,
+                                                         float randv,
+                                                         float *slope_x,
+                                                         float *slope_y,
+                                                         float *G1i)
 {
-	/* special case (normal incidence) */
-	if(cos_theta_i >= 0.99999f) {
-		const float r = sqrtf(-logf(randu));
-		const float phi = M_2PI_F * randv;
-		*slope_x = r * cosf(phi);
-		*slope_y = r * sinf(phi);
-		*G1i = 1.0f;
-		return;
-	}
-
-	/* precomputations */
-	const float tan_theta_i = sin_theta_i/cos_theta_i;
-	const float inv_a = tan_theta_i;
-	const float cot_theta_i = 1.0f/tan_theta_i;
-	const float erf_a = fast_erff(cot_theta_i);
-	const float exp_a2 = expf(-cot_theta_i*cot_theta_i);
-	const float SQRT_PI_INV = 0.56418958354f;
-	const float Lambda = 0.5f*(erf_a - 1.0f) + (0.5f*SQRT_PI_INV)*(exp_a2*inv_a);
-	const float G1 = 1.0f/(1.0f + Lambda); /* masking */
-
-	*G1i = G1;
+  /* special case (normal incidence) */
+  if (cos_theta_i >= 0.99999f) {
+    const float r = sqrtf(-logf(randu));
+    const float phi = M_2PI_F * randv;
+    *slope_x = r * cosf(phi);
+    *slope_y = r * sinf(phi);
+    *G1i = 1.0f;
+    return;
+  }
+
+  /* precomputations */
+  const float tan_theta_i = sin_theta_i / cos_theta_i;
+  const float inv_a = tan_theta_i;
+  const float cot_theta_i = 1.0f / tan_theta_i;
+  const float erf_a = fast_erff(cot_theta_i);
+  const float exp_a2 = expf(-cot_theta_i * cot_theta_i);
+  const float SQRT_PI_INV = 0.56418958354f;
+  const float Lambda = 0.5f * (erf_a - 1.0f) + (0.5f * SQRT_PI_INV) * (exp_a2 * inv_a);
+  const float G1 = 1.0f / (1.0f + Lambda); /* masking */
+
+  *G1i = G1;
 
 #if defined(__KERNEL_GPU__)
-	/* Based on paper from Wenzel Jakob
-	 * An Improved Visible Normal Sampling Routine for the Beckmann Distribution
-	 *
-	 * http://www.mitsuba-renderer.org/~wenzel/files/visnormal.pdf
-	 *
-	 * Reformulation from OpenShadingLanguage which avoids using inverse
-	 * trigonometric functions.
-	 */
-
-	/* Sample slope X.
-	 *
-	 * Compute a coarse approximation using the approximation:
-	 *   exp(-ierf(x)^2) ~= 1 - x * x
-	 *   solve y = 1 + b + K * (1 - b * b)
-	 */
-	float K = tan_theta_i * SQRT_PI_INV;
-	float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a));
-	float y_exact  = randu * (1.0f + erf_a + K * exp_a2);
-	float b = K > 0 ? (0.5f - sqrtf(K * (K - y_approx + 1.0f) + 0.25f)) / K : y_approx - 1.0f;
-
-	/* Perform newton step to refine toward the true root. */
-	float inv_erf = fast_ierff(b);
-	float value  = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
-	/* Check if we are close enough already,
-	 * this also avoids NaNs as we get close to the root.
-	 */
-	if(fabsf(value) > 1e-6f) {
-		b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 1. */
-		inv_erf = fast_ierff(b);
-		value  = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
-		b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 2. */
-		/* Compute the slope from the refined value. */
-		*slope_x = fast_ierff(b);
-	}
-	else {
-		/* We are close enough already. */
-		*slope_x = inv_erf;
-	}
-	*slope_y = fast_ierff(2.0f*randv - 1.0f);
+  /* Based on paper from Wenzel Jakob
+   * An Improved Visible Normal Sampling Routine for the Beckmann Distribution
+   *
+   * http://www.mitsuba-renderer.org/~wenzel/files/visnormal.pdf
+   *
+   * Reformulation from OpenShadingLanguage which avoids using inverse
+   * trigonometric functions.
+   */
+
+  /* Sample slope X.
+   *
+   * Compute a coarse approximation using the approximation:
+   *   exp(-ierf(x)^2) ~= 1 - x * x
+   *   solve y = 1 + b + K * (1 - b * b)
+   */
+  float K = tan_theta_i * SQRT_PI_INV;
+  float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a));
+  float y_exact = randu * (1.0f + erf_a + K * exp_a2);
+  float b = K > 0 ? (0.5f - sqrtf(K * (K - y_approx + 1.0f) + 0.25f)) / K : y_approx - 1.0f;
+
+  /* Perform newton step to refine toward the true root. */
+  float inv_erf = fast_ierff(b);
+  float value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
+  /* Check if we are close enough already,
+   * this also avoids NaNs as we get close to the root.
+   */
+  if (fabsf(value) > 1e-6f) {
+    b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 1. */
+    inv_erf = fast_ierff(b);
+    value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
+    b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 2. */
+    /* Compute the slope from the refined value. */
+    *slope_x = fast_ierff(b);
+  }
+  else {
+    /* We are close enough already. */
+    *slope_x = inv_erf;
+  }
+  *slope_y = fast_ierff(2.0f * randv - 1.0f);
 #else
-	/* Use precomputed table on CPU, it gives better perfomance. */
-	int beckmann_table_offset = kernel_data.tables.beckmann_offset;
+  /* Use precomputed table on CPU, it gives better perfomance. */
+  int beckmann_table_offset = kernel_data.tables.beckmann_offset;
 
-	*slope_x = lookup_table_read_2D(kg, randu, cos_theta_i,
-		beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE);
-	*slope_y = fast_ierff(2.0f*randv - 1.0f);
+  *slope_x = lookup_table_read_2D(
+      kg, randu, cos_theta_i, beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE);
+  *slope_y = fast_ierff(2.0f * randv - 1.0f);
 #endif
 }
 
@@ -134,103 +137,109 @@ ccl_device_inline void microfacet_beckmann_sample_slopes(
  * E. Heitz and E. d'Eon, EGSR 2014
  */
 
-ccl_device_inline void microfacet_ggx_sample_slopes(
-	const float cos_theta_i, const float sin_theta_i,
-	float randu, float randv, float *slope_x, float *slope_y,
-	float *G1i)
+ccl_device_inline void microfacet_ggx_sample_slopes(const float cos_theta_i,
+                                                    const float sin_theta_i,
+                                                    float randu,
+                                                    float randv,
+                                                    float *slope_x,
+                                                    float *slope_y,
+                                                    float *G1i)
 {
-	/* special case (normal incidence) */
-	if(cos_theta_i >= 0.99999f) {
-		const float r = sqrtf(randu/(1.0f - randu));
-		const float phi = M_2PI_F * randv;
-		*slope_x = r * cosf(phi);
-		*slope_y = r * sinf(phi);
-		*G1i = 1.0f;
-
-		return;
-	}
-
-	/* precomputations */
-	const float tan_theta_i = sin_theta_i/cos_theta_i;
-	const float G1_inv = 0.5f * (1.0f + safe_sqrtf(1.0f + tan_theta_i*tan_theta_i));
-
-	*G1i = 1.0f/G1_inv;
-
-	/* sample slope_x */
-	const float A = 2.0f*randu*G1_inv - 1.0f;
-	const float AA = A*A;
-	const float tmp = 1.0f/(AA - 1.0f);
-	const float B = tan_theta_i;
-	const float BB = B*B;
-	const float D = safe_sqrtf(BB*(tmp*tmp) - (AA - BB)*tmp);
-	const float slope_x_1 = B*tmp - D;
-	const float slope_x_2 = B*tmp + D;
-	*slope_x = (A < 0.0f || slope_x_2*tan_theta_i > 1.0f)? slope_x_1: slope_x_2;
-
-	/* sample slope_y */
-	float S;
-
-	if(randv > 0.5f) {
-		S = 1.0f;
-		randv = 2.0f*(randv - 0.5f);
-	}
-	else {
-		S = -1.0f;
-		randv = 2.0f*(0.5f - randv);
-	}
-
-	const float z = (randv*(randv*(randv*0.27385f - 0.73369f) + 0.46341f)) / (randv*(randv*(randv*0.093073f + 0.309420f) - 1.000000f) + 0.597999f);
-	*slope_y = S * z * safe_sqrtf(1.0f + (*slope_x)*(*slope_x));
+  /* special case (normal incidence) */
+  if (cos_theta_i >= 0.99999f) {
+    const float r = sqrtf(randu / (1.0f - randu));
+    const float phi = M_2PI_F * randv;
+    *slope_x = r * cosf(phi);
+    *slope_y = r * sinf(phi);
+    *G1i = 1.0f;
+
+    return;
+  }
+
+  /* precomputations */
+  const float tan_theta_i = sin_theta_i / cos_theta_i;
+  const float G1_inv = 0.5f * (1.0f + safe_sqrtf(1.0f + tan_theta_i * tan_theta_i));
+
+  *G1i = 1.0f / G1_inv;
+
+  /* sample slope_x */
+  const float A = 2.0f * randu * G1_inv - 1.0f;
+  const float AA = A * A;
+  const float tmp = 1.0f / (AA - 1.0f);
+  const float B = tan_theta_i;
+  const float BB = B * B;
+  const float D = safe_sqrtf(BB * (tmp * tmp) - (AA - BB) * tmp);
+  const float slope_x_1 = B * tmp - D;
+  const float slope_x_2 = B * tmp + D;
+  *slope_x = (A < 0.0f || slope_x_2 * tan_theta_i > 1.0f) ? slope_x_1 : slope_x_2;
+
+  /* sample slope_y */
+  float S;
+
+  if (randv > 0.5f) {
+    S = 1.0f;
+    randv = 2.0f * (randv - 0.5f);
+  }
+  else {
+    S = -1.0f;
+    randv = 2.0f * (0.5f - randv);
+  }
+
+  const float z = (randv * (randv * (randv * 0.27385f - 0.73369f) + 0.46341f)) /
+                  (randv * (randv * (randv * 0.093073f + 0.309420f) - 1.000000f) + 0.597999f);
+  *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x) * (*slope_x));
 }
 
-ccl_device_forceinline float3 microfacet_sample_stretched(
-	KernelGlobals *kg, const float3 omega_i,
-	const float alpha_x, const float alpha_y,
-	const float randu, const float randv,
-	bool beckmann, float *G1i)
+ccl_device_forceinline float3 microfacet_sample_stretched(KernelGlobals *kg,
+                                                          const float3 omega_i,
+                                                          const float alpha_x,
+                                                          const float alpha_y,
+                                                          const float randu,
+                                                          const float randv,
+                                                          bool beckmann,
+                                                          float *G1i)
 {
-	/* 1. stretch omega_i */
-	float3 omega_i_ = make_float3(alpha_x * omega_i.x, alpha_y * omega_i.y, omega_i.z);
-	omega_i_ = normalize(omega_i_);
-
-	/* get polar coordinates of omega_i_ */
-	float costheta_ = 1.0f;
-	float sintheta_ = 0.0f;
-	float cosphi_ = 1.0f;
-	float sinphi_ = 0.0f;
-
-	if(omega_i_.z < 0.99999f) {
-		costheta_ = omega_i_.z;
-		sintheta_ = safe_sqrtf(1.0f - costheta_*costheta_);
-
-		float invlen = 1.0f/sintheta_;
-		cosphi_ = omega_i_.x * invlen;
-		sinphi_ = omega_i_.y * invlen;
-	}
-
-	/* 2. sample P22_{omega_i}(x_slope, y_slope, 1, 1) */
-	float slope_x, slope_y;
-
-	if(beckmann) {
-		microfacet_beckmann_sample_slopes(kg, costheta_, sintheta_,
-			randu, randv, &slope_x, &slope_y, G1i);
-	}
-	else {
-		microfacet_ggx_sample_slopes(costheta_, sintheta_,
-			randu, randv, &slope_x, &slope_y, G1i);
-	}
-
-	/* 3. rotate */
-	float tmp = cosphi_*slope_x - sinphi_*slope_y;
-	slope_y = sinphi_*slope_x + cosphi_*slope_y;
-	slope_x = tmp;
-
-	/* 4. unstretch */
-	slope_x = alpha_x * slope_x;
-	slope_y = alpha_y * slope_y;
-
-	/* 5. compute normal */
-	return normalize(make_float3(-slope_x, -slope_y, 1.0f));
+  /* 1. stretch omega_i */
+  float3 omega_i_ = make_float3(alpha_x * omega_i.x, alpha_y * omega_i.y, omega_i.z);
+  omega_i_ = normalize(omega_i_);
+
+  /* get polar coordinates of omega_i_ */
+  float costheta_ = 1.0f;
+  float sintheta_ = 0.0f;
+  float cosphi_ = 1.0f;
+  float sinphi_ = 0.0f;
+
+  if (omega_i_.z < 0.99999f) {
+    costheta_ = omega_i_.z;
+    sintheta_ = safe_sqrtf(1.0f - costheta_ * costheta_);
+
+    float invlen = 1.0f / sintheta_;
+    cosphi_ = omega_i_.x * invlen;
+    sinphi_ = omega_i_.y * invlen;
+  }
+
+  /* 2. sample P22_{omega_i}(x_slope, y_slope, 1, 1) */
+  float slope_x, slope_y;
+
+  if (beckmann) {
+    microfacet_beckmann_sample_slopes(
+        kg, costheta_, sintheta_, randu, randv, &slope_x, &slope_y, G1i);
+  }
+  else {
+    microfacet_ggx_sample_slopes(costheta_, sintheta_, randu, randv, &slope_x, &slope_y, G1i);
+  }
+
+  /* 3. rotate */
+  float tmp = cosphi_ * slope_x - sinphi_ * slope_y;
+  slope_y = sinphi_ * slope_x + cosphi_ * slope_y;
+  slope_x = tmp;
+
+  /* 4. unstretch */
+  slope_x = alpha_x * slope_x;
+  slope_y = alpha_y * slope_y;
+
+  /* 5. compute normal */
+  return normalize(make_float3(-slope_x, -slope_y, 1.0f));
 }
 
 /* Calculate the reflection color
@@ -240,27 +249,29 @@ ccl_device_forceinline float3 microfacet_sample_stretched(
  *
  * Else it is simply white
  */
-ccl_device_forceinline float3 reflection_color(const MicrofacetBsdf *bsdf, float3 L, float3 H) {
-	float3 F = make_float3(1.0f, 1.0f, 1.0f);
-	bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID
-	                   || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID
-	                   || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID);
+ccl_device_forceinline float3 reflection_color(const MicrofacetBsdf *bsdf, float3 L, float3 H)
+{
+  float3 F = make_float3(1.0f, 1.0f, 1.0f);
+  bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID ||
+                      bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID ||
+                      bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID);
 
-	if(use_fresnel) {
-		float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+  if (use_fresnel) {
+    float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
 
-		F = interpolate_fresnel_color(L, H, bsdf->ior, F0, bsdf->extra->cspec0);
-	}
+    F = interpolate_fresnel_color(L, H, bsdf->ior, F0, bsdf->extra->cspec0);
+  }
 
-	return F;
+  return F;
 }
 
 ccl_device_forceinline float D_GTR1(float NdotH, float alpha)
 {
-	if(alpha >= 1.0f) return M_1_PI_F;
-	float alpha2 = alpha*alpha;
-	float t = 1.0f + (alpha2 - 1.0f) * NdotH*NdotH;
-	return (alpha2 - 1.0f) / (M_PI_F * logf(alpha2) * t);
+  if (alpha >= 1.0f)
+    return M_1_PI_F;
+  float alpha2 = alpha * alpha;
+  float t = 1.0f + (alpha2 - 1.0f) * NdotH * NdotH;
+  return (alpha2 - 1.0f) / (M_PI_F * logf(alpha2) * t);
 }
 
 /* GGX microfacet with Smith shadow-masking from:
@@ -278,483 +289,511 @@ ccl_device_forceinline float D_GTR1(float NdotH, float alpha)
 
 ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->extra = NULL;
+  bsdf->extra = NULL;
 
-	bsdf->alpha_x = saturate(bsdf->alpha_x);
-	bsdf->alpha_y = bsdf->alpha_x;
+  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_y = bsdf->alpha_x;
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID;
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
 {
-	bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
-	bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
-	bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+  bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+  bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+  bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
 
-	float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
-	float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
-	bsdf->sample_weight *= F;
+  float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+  float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+  bsdf->sample_weight *= F;
 
-	bsdf->alpha_x = saturate(bsdf->alpha_x);
-	bsdf->alpha_y = bsdf->alpha_x;
+  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_y = bsdf->alpha_x;
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID;
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
 {
-	bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
-	bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
-	bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+  bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+  bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+  bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
 
-	float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
-	float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
-	bsdf->sample_weight *= 0.25f * bsdf->extra->clearcoat * F;
+  float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+  float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+  bsdf->sample_weight *= 0.25f * bsdf->extra->clearcoat * F;
 
-	bsdf->alpha_x = saturate(bsdf->alpha_x);
-	bsdf->alpha_y = bsdf->alpha_x;
+  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_y = bsdf->alpha_x;
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID;
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device bool bsdf_microfacet_merge(const ShaderClosure *a, const ShaderClosure *b)
 {
-	const MicrofacetBsdf *bsdf_a = (const MicrofacetBsdf*)a;
-	const MicrofacetBsdf *bsdf_b = (const MicrofacetBsdf*)b;
-
-	return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
-	       (bsdf_a->alpha_x == bsdf_b->alpha_x) &&
-	       (bsdf_a->alpha_y == bsdf_b->alpha_y) &&
-	       (isequal_float3(bsdf_a->T, bsdf_b->T)) &&
-	       (bsdf_a->ior == bsdf_b->ior) &&
-	       ((bsdf_a->extra == NULL && bsdf_b->extra == NULL) ||
-	        ((bsdf_a->extra && bsdf_b->extra) &&
-	         (isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color)) &&
-	         (isequal_float3(bsdf_a->extra->cspec0, bsdf_b->extra->cspec0)) &&
-	         (bsdf_a->extra->clearcoat == bsdf_b->extra->clearcoat)));
+  const MicrofacetBsdf *bsdf_a = (const MicrofacetBsdf *)a;
+  const MicrofacetBsdf *bsdf_b = (const MicrofacetBsdf *)b;
+
+  return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->alpha_x == bsdf_b->alpha_x) &&
+         (bsdf_a->alpha_y == bsdf_b->alpha_y) && (isequal_float3(bsdf_a->T, bsdf_b->T)) &&
+         (bsdf_a->ior == bsdf_b->ior) &&
+         ((bsdf_a->extra == NULL && bsdf_b->extra == NULL) ||
+          ((bsdf_a->extra && bsdf_b->extra) &&
+           (isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color)) &&
+           (isequal_float3(bsdf_a->extra->cspec0, bsdf_b->extra->cspec0)) &&
+           (bsdf_a->extra->clearcoat == bsdf_b->extra->clearcoat)));
 }
 
 ccl_device int bsdf_microfacet_ggx_aniso_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->extra = NULL;
+  bsdf->extra = NULL;
 
-	bsdf->alpha_x = saturate(bsdf->alpha_x);
-	bsdf->alpha_y = saturate(bsdf->alpha_y);
+  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_y = saturate(bsdf->alpha_y);
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
 {
-	bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
-	bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
-	bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+  bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+  bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+  bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
 
-	float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
-	float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
-	bsdf->sample_weight *= F;
+  float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+  float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+  bsdf->sample_weight *= F;
 
-	bsdf->alpha_x = saturate(bsdf->alpha_x);
-	bsdf->alpha_y = saturate(bsdf->alpha_y);
+  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_y = saturate(bsdf->alpha_y);
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID;
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device int bsdf_microfacet_ggx_refraction_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->extra = NULL;
+  bsdf->extra = NULL;
 
-	bsdf->alpha_x = saturate(bsdf->alpha_x);
-	bsdf->alpha_y = bsdf->alpha_x;
+  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_y = bsdf->alpha_x;
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device void bsdf_microfacet_ggx_blur(ShaderClosure *sc, float roughness)
 {
-	MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
 
-	bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
-	bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
+  bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+  bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
 }
 
-ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc,
+                                                   const float3 I,
+                                                   const float3 omega_in,
+                                                   float *pdf)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-	float alpha_x = bsdf->alpha_x;
-	float alpha_y = bsdf->alpha_y;
-	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
-	float3 N = bsdf->N;
-
-	if(m_refractive || alpha_x*alpha_y <= 1e-7f)
-		return make_float3(0.0f, 0.0f, 0.0f);
-
-	float cosNO = dot(N, I);
-	float cosNI = dot(N, omega_in);
-
-	if(cosNI > 0 && cosNO > 0) {
-		/* get half vector */
-		float3 m = normalize(omega_in + I);
-		float alpha2 = alpha_x * alpha_y;
-		float D, G1o, G1i;
-
-		if(alpha_x == alpha_y) {
-			/* isotropic
-			 * eq. 20: (F*G*D)/(4*in*on)
-			 * eq. 33: first we calculate D(m) */
-			float cosThetaM = dot(N, m);
-			float cosThetaM2 = cosThetaM * cosThetaM;
-			float cosThetaM4 = cosThetaM2 * cosThetaM2;
-			float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
-
-			if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
-				/* use GTR1 for clearcoat */
-				D = D_GTR1(cosThetaM, bsdf->alpha_x);
-
-				/* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */
-				alpha2 = 0.0625f;
-			}
-			else {
-				/* use GTR2 otherwise */
-				D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
-			}
-
-			/* eq. 34: now calculate G1(i,m) and G1(o,m) */
-			G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
-			G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
-		}
-		else {
-			/* anisotropic */
-			float3 X, Y, Z = N;
-			make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
-
-			/* distribution */
-			float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
-			float slope_x = -local_m.x/(local_m.z*alpha_x);
-			float slope_y = -local_m.y/(local_m.z*alpha_y);
-			float slope_len = 1 + slope_x*slope_x + slope_y*slope_y;
-
-			float cosThetaM = local_m.z;
-			float cosThetaM2 = cosThetaM * cosThetaM;
-			float cosThetaM4 = cosThetaM2 * cosThetaM2;
-
-			D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4);
-
-			/* G1(i,m) and G1(o,m) */
-			float tanThetaO2 = (1 - cosNO * cosNO) / (cosNO * cosNO);
-			float cosPhiO = dot(I, X);
-			float sinPhiO = dot(I, Y);
-
-			float alphaO2 = (cosPhiO*cosPhiO)*(alpha_x*alpha_x) + (sinPhiO*sinPhiO)*(alpha_y*alpha_y);
-			alphaO2 /= cosPhiO*cosPhiO + sinPhiO*sinPhiO;
-
-			G1o = 2 / (1 + safe_sqrtf(1 + alphaO2 * tanThetaO2));
-
-			float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
-			float cosPhiI = dot(omega_in, X);
-			float sinPhiI = dot(omega_in, Y);
-
-			float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
-			alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
-
-			G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
-		}
-
-		float G = G1o * G1i;
-
-		/* eq. 20 */
-		float common = D * 0.25f / cosNO;
-
-		float3 F = reflection_color(bsdf, omega_in, m);
-		if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
-			F *= 0.25f * bsdf->extra->clearcoat;
-		}
-
-		float3 out = F * G * common;
-
-		/* eq. 2 in distribution of visible normals sampling
-		 * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
-
-		/* eq. 38 - but see also:
-		 * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
-		 * pdf = pm * 0.25 / dot(m, I); */
-		*pdf = G1o * common;
-
-		return out;
-	}
-
-	return make_float3(0.0f, 0.0f, 0.0f);
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  float alpha_x = bsdf->alpha_x;
+  float alpha_y = bsdf->alpha_y;
+  bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+  float3 N = bsdf->N;
+
+  if (m_refractive || alpha_x * alpha_y <= 1e-7f)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+  float cosNO = dot(N, I);
+  float cosNI = dot(N, omega_in);
+
+  if (cosNI > 0 && cosNO > 0) {
+    /* get half vector */
+    float3 m = normalize(omega_in + I);
+    float alpha2 = alpha_x * alpha_y;
+    float D, G1o, G1i;
+
+    if (alpha_x == alpha_y) {
+      /* isotropic
+       * eq. 20: (F*G*D)/(4*in*on)
+       * eq. 33: first we calculate D(m) */
+      float cosThetaM = dot(N, m);
+      float cosThetaM2 = cosThetaM * cosThetaM;
+      float cosThetaM4 = cosThetaM2 * cosThetaM2;
+      float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
+
+      if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
+        /* use GTR1 for clearcoat */
+        D = D_GTR1(cosThetaM, bsdf->alpha_x);
+
+        /* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */
+        alpha2 = 0.0625f;
+      }
+      else {
+        /* use GTR2 otherwise */
+        D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
+      }
+
+      /* eq. 34: now calculate G1(i,m) and G1(o,m) */
+      G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
+      G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+    }
+    else {
+      /* anisotropic */
+      float3 X, Y, Z = N;
+      make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+
+      /* distribution */
+      float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+      float slope_x = -local_m.x / (local_m.z * alpha_x);
+      float slope_y = -local_m.y / (local_m.z * alpha_y);
+      float slope_len = 1 + slope_x * slope_x + slope_y * slope_y;
+
+      float cosThetaM = local_m.z;
+      float cosThetaM2 = cosThetaM * cosThetaM;
+      float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+      D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4);
+
+      /* G1(i,m) and G1(o,m) */
+      float tanThetaO2 = (1 - cosNO * cosNO) / (cosNO * cosNO);
+      float cosPhiO = dot(I, X);
+      float sinPhiO = dot(I, Y);
+
+      float alphaO2 = (cosPhiO * cosPhiO) * (alpha_x * alpha_x) +
+                      (sinPhiO * sinPhiO) * (alpha_y * alpha_y);
+      alphaO2 /= cosPhiO * cosPhiO + sinPhiO * sinPhiO;
+
+      G1o = 2 / (1 + safe_sqrtf(1 + alphaO2 * tanThetaO2));
+
+      float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
+      float cosPhiI = dot(omega_in, X);
+      float sinPhiI = dot(omega_in, Y);
+
+      float alphaI2 = (cosPhiI * cosPhiI) * (alpha_x * alpha_x) +
+                      (sinPhiI * sinPhiI) * (alpha_y * alpha_y);
+      alphaI2 /= cosPhiI * cosPhiI + sinPhiI * sinPhiI;
+
+      G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
+    }
+
+    float G = G1o * G1i;
+
+    /* eq. 20 */
+    float common = D * 0.25f / cosNO;
+
+    float3 F = reflection_color(bsdf, omega_in, m);
+    if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
+      F *= 0.25f * bsdf->extra->clearcoat;
+    }
+
+    float3 out = F * G * common;
+
+    /* eq. 2 in distribution of visible normals sampling
+     * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+
+    /* eq. 38 - but see also:
+     * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
+     * pdf = pm * 0.25 / dot(m, I); */
+    *pdf = G1o * common;
+
+    return out;
+  }
+
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc,
+                                                    const float3 I,
+                                                    const float3 omega_in,
+                                                    float *pdf)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-	float alpha_x = bsdf->alpha_x;
-	float alpha_y = bsdf->alpha_y;
-	float m_eta = bsdf->ior;
-	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
-	float3 N = bsdf->N;
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  float alpha_x = bsdf->alpha_x;
+  float alpha_y = bsdf->alpha_y;
+  float m_eta = bsdf->ior;
+  bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+  float3 N = bsdf->N;
 
-	if(!m_refractive || alpha_x*alpha_y <= 1e-7f)
-		return make_float3(0.0f, 0.0f, 0.0f);
+  if (!m_refractive || alpha_x * alpha_y <= 1e-7f)
+    return make_float3(0.0f, 0.0f, 0.0f);
 
-	float cosNO = dot(N, I);
-	float cosNI = dot(N, omega_in);
+  float cosNO = dot(N, I);
+  float cosNI = dot(N, omega_in);
 
-	if(cosNO <= 0 || cosNI >= 0)
-		return make_float3(0.0f, 0.0f, 0.0f); /* vectors on same side -- not possible */
+  if (cosNO <= 0 || cosNI >= 0)
+    return make_float3(0.0f, 0.0f, 0.0f); /* vectors on same side -- not possible */
 
-	/* compute half-vector of the refraction (eq. 16) */
-	float3 ht = -(m_eta * omega_in + I);
-	float3 Ht = normalize(ht);
-	float cosHO = dot(Ht, I);
-	float cosHI = dot(Ht, omega_in);
+  /* compute half-vector of the refraction (eq. 16) */
+  float3 ht = -(m_eta * omega_in + I);
+  float3 Ht = normalize(ht);
+  float cosHO = dot(Ht, I);
+  float cosHI = dot(Ht, omega_in);
 
-	float D, G1o, G1i;
+  float D, G1o, G1i;
 
-	/* eq. 33: first we calculate D(m) with m=Ht: */
-	float alpha2 = alpha_x * alpha_y;
-	float cosThetaM = dot(N, Ht);
-	float cosThetaM2 = cosThetaM * cosThetaM;
-	float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
-	float cosThetaM4 = cosThetaM2 * cosThetaM2;
-	D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
+  /* eq. 33: first we calculate D(m) with m=Ht: */
+  float alpha2 = alpha_x * alpha_y;
+  float cosThetaM = dot(N, Ht);
+  float cosThetaM2 = cosThetaM * cosThetaM;
+  float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
+  float cosThetaM4 = cosThetaM2 * cosThetaM2;
+  D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
 
-	/* eq. 34: now calculate G1(i,m) and G1(o,m) */
-	G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
-	G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+  /* eq. 34: now calculate G1(i,m) and G1(o,m) */
+  G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
+  G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
 
-	float G = G1o * G1i;
+  float G = G1o * G1i;
 
-	/* probability */
-	float Ht2 = dot(ht, ht);
+  /* probability */
+  float Ht2 = dot(ht, ht);
 
-	/* eq. 2 in distribution of visible normals sampling
-	 * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+  /* eq. 2 in distribution of visible normals sampling
+   * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
 
-	/* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2)
-	 * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */
-	float common = D * (m_eta * m_eta) / (cosNO * Ht2);
-	float out = G * fabsf(cosHI * cosHO) * common;
-	*pdf = G1o * fabsf(cosHO * cosHI) * common;
+  /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2)
+   * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */
+  float common = D * (m_eta * m_eta) / (cosNO * Ht2);
+  float out = G * fabsf(cosHI * cosHO) * common;
+  *pdf = G1o * fabsf(cosHO * cosHI) * common;
 
-	return make_float3(out, out, out);
+  return make_float3(out, out, out);
 }
 
-ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg,
+                                          const ShaderClosure *sc,
+                                          float3 Ng,
+                                          float3 I,
+                                          float3 dIdx,
+                                          float3 dIdy,
+                                          float randu,
+                                          float randv,
+                                          float3 *eval,
+                                          float3 *omega_in,
+                                          float3 *domega_in_dx,
+                                          float3 *domega_in_dy,
+                                          float *pdf)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-	float alpha_x = bsdf->alpha_x;
-	float alpha_y = bsdf->alpha_y;
-	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
-	float3 N = bsdf->N;
-	int label;
-
-	float cosNO = dot(N, I);
-	if(cosNO > 0) {
-		float3 X, Y, Z = N;
-
-		if(alpha_x == alpha_y)
-			make_orthonormals(Z, &X, &Y);
-		else
-			make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
-
-		/* importance sampling with distribution of visible normals. vectors are
-		 * transformed to local space before and after */
-		float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO);
-		float3 local_m;
-		float G1o;
-
-		local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_y,
-			randu, randv, false, &G1o);
-
-		float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z;
-		float cosThetaM = local_m.z;
-
-		/* reflection or refraction? */
-		if(!m_refractive) {
-			float cosMO = dot(m, I);
-			label = LABEL_REFLECT | LABEL_GLOSSY;
-
-			if(cosMO > 0) {
-				/* eq. 39 - compute actual reflected direction */
-				*omega_in = 2 * cosMO * m - I;
-
-				if(dot(Ng, *omega_in) > 0) {
-					if(alpha_x*alpha_y <= 1e-7f) {
-						/* some high number for MIS */
-						*pdf = 1e6f;
-						*eval = make_float3(1e6f, 1e6f, 1e6f);
-
-						bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID
-						                   || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID
-						                   || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID);
-
-						/* if fresnel is used, calculate the color with reflection_color(...) */
-						if(use_fresnel) {
-							*eval *= reflection_color(bsdf, *omega_in, m);
-						}
-
-						label = LABEL_REFLECT | LABEL_SINGULAR;
-					}
-					else {
-						/* microfacet normal is visible to this ray */
-						/* eq. 33 */
-						float alpha2 = alpha_x * alpha_y;
-						float D, G1i;
-
-						if(alpha_x == alpha_y) {
-							/* isotropic */
-							float cosThetaM2 = cosThetaM * cosThetaM;
-							float cosThetaM4 = cosThetaM2 * cosThetaM2;
-							float tanThetaM2 = 1/(cosThetaM2) - 1;
-
-							/* eval BRDF*cosNI */
-							float cosNI = dot(N, *omega_in);
-
-							if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
-								/* use GTR1 for clearcoat */
-								D = D_GTR1(cosThetaM, bsdf->alpha_x);
-
-								/* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */
-								alpha2 = 0.0625f;
-
-								/* recalculate G1o */
-								G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
-							}
-							else {
-								/* use GTR2 otherwise */
-								D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
-							}
-
-							/* eq. 34: now calculate G1(i,m) */
-							G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
-						}
-						else {
-							/* anisotropic distribution */
-							float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
-							float slope_x = -local_m.x/(local_m.z*alpha_x);
-							float slope_y = -local_m.y/(local_m.z*alpha_y);
-							float slope_len = 1 + slope_x*slope_x + slope_y*slope_y;
-
-							float cosThetaM = local_m.z;
-							float cosThetaM2 = cosThetaM * cosThetaM;
-							float cosThetaM4 = cosThetaM2 * cosThetaM2;
-
-							D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4);
-
-							/* calculate G1(i,m) */
-							float cosNI = dot(N, *omega_in);
-
-							float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
-							float cosPhiI = dot(*omega_in, X);
-							float sinPhiI = dot(*omega_in, Y);
-
-							float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
-							alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
-
-							G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
-						}
-
-						/* see eval function for derivation */
-						float common = (G1o * D) * 0.25f / cosNO;
-						*pdf = common;
-
-						float3 F = reflection_color(bsdf, *omega_in, m);
-
-						*eval = G1i * common * F;
-					}
-
-					if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
-						*eval *= 0.25f * bsdf->extra->clearcoat;
-					}
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  float alpha_x = bsdf->alpha_x;
+  float alpha_y = bsdf->alpha_y;
+  bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+  float3 N = bsdf->N;
+  int label;
+
+  float cosNO = dot(N, I);
+  if (cosNO > 0) {
+    float3 X, Y, Z = N;
+
+    if (alpha_x == alpha_y)
+      make_orthonormals(Z, &X, &Y);
+    else
+      make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+
+    /* importance sampling with distribution of visible normals. vectors are
+     * transformed to local space before and after */
+    float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO);
+    float3 local_m;
+    float G1o;
+
+    local_m = microfacet_sample_stretched(
+        kg, local_I, alpha_x, alpha_y, randu, randv, false, &G1o);
+
+    float3 m = X * local_m.x + Y * local_m.y + Z * local_m.z;
+    float cosThetaM = local_m.z;
+
+    /* reflection or refraction? */
+    if (!m_refractive) {
+      float cosMO = dot(m, I);
+      label = LABEL_REFLECT | LABEL_GLOSSY;
+
+      if (cosMO > 0) {
+        /* eq. 39 - compute actual reflected direction */
+        *omega_in = 2 * cosMO * m - I;
+
+        if (dot(Ng, *omega_in) > 0) {
+          if (alpha_x * alpha_y <= 1e-7f) {
+            /* some high number for MIS */
+            *pdf = 1e6f;
+            *eval = make_float3(1e6f, 1e6f, 1e6f);
+
+            bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID ||
+                                bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID ||
+                                bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID);
+
+            /* if fresnel is used, calculate the color with reflection_color(...) */
+            if (use_fresnel) {
+              *eval *= reflection_color(bsdf, *omega_in, m);
+            }
+
+            label = LABEL_REFLECT | LABEL_SINGULAR;
+          }
+          else {
+            /* microfacet normal is visible to this ray */
+            /* eq. 33 */
+            float alpha2 = alpha_x * alpha_y;
+            float D, G1i;
+
+            if (alpha_x == alpha_y) {
+              /* isotropic */
+              float cosThetaM2 = cosThetaM * cosThetaM;
+              float cosThetaM4 = cosThetaM2 * cosThetaM2;
+              float tanThetaM2 = 1 / (cosThetaM2)-1;
+
+              /* eval BRDF*cosNI */
+              float cosNI = dot(N, *omega_in);
+
+              if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
+                /* use GTR1 for clearcoat */
+                D = D_GTR1(cosThetaM, bsdf->alpha_x);
+
+                /* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */
+                alpha2 = 0.0625f;
+
+                /* recalculate G1o */
+                G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
+              }
+              else {
+                /* use GTR2 otherwise */
+                D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
+              }
+
+              /* eq. 34: now calculate G1(i,m) */
+              G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+            }
+            else {
+              /* anisotropic distribution */
+              float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+              float slope_x = -local_m.x / (local_m.z * alpha_x);
+              float slope_y = -local_m.y / (local_m.z * alpha_y);
+              float slope_len = 1 + slope_x * slope_x + slope_y * slope_y;
+
+              float cosThetaM = local_m.z;
+              float cosThetaM2 = cosThetaM * cosThetaM;
+              float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+              D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4);
+
+              /* calculate G1(i,m) */
+              float cosNI = dot(N, *omega_in);
+
+              float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
+              float cosPhiI = dot(*omega_in, X);
+              float sinPhiI = dot(*omega_in, Y);
+
+              float alphaI2 = (cosPhiI * cosPhiI) * (alpha_x * alpha_x) +
+                              (sinPhiI * sinPhiI) * (alpha_y * alpha_y);
+              alphaI2 /= cosPhiI * cosPhiI + sinPhiI * sinPhiI;
+
+              G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
+            }
+
+            /* see eval function for derivation */
+            float common = (G1o * D) * 0.25f / cosNO;
+            *pdf = common;
+
+            float3 F = reflection_color(bsdf, *omega_in, m);
+
+            *eval = G1i * common * F;
+          }
+
+          if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
+            *eval *= 0.25f * bsdf->extra->clearcoat;
+          }
 
 #ifdef __RAY_DIFFERENTIALS__
-					*domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
-					*domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
+          *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
+          *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
 #endif
-				}
-			}
-		}
-		else {
-			label = LABEL_TRANSMIT | LABEL_GLOSSY;
-
-			/* CAUTION: the i and o variables are inverted relative to the paper
-			 * eq. 39 - compute actual refractive direction */
-			float3 R, T;
+        }
+      }
+    }
+    else {
+      label = LABEL_TRANSMIT | LABEL_GLOSSY;
+
+      /* CAUTION: the i and o variables are inverted relative to the paper
+       * eq. 39 - compute actual refractive direction */
+      float3 R, T;
 #ifdef __RAY_DIFFERENTIALS__
-			float3 dRdx, dRdy, dTdx, dTdy;
+      float3 dRdx, dRdy, dTdx, dTdy;
 #endif
-			float m_eta = bsdf->ior, fresnel;
-			bool inside;
-
-			fresnel = fresnel_dielectric(m_eta, m, I, &R, &T,
+      float m_eta = bsdf->ior, fresnel;
+      bool inside;
+
+      fresnel = fresnel_dielectric(m_eta,
+                                   m,
+                                   I,
+                                   &R,
+                                   &T,
 #ifdef __RAY_DIFFERENTIALS__
-				dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy,
+                                   dIdx,
+                                   dIdy,
+                                   &dRdx,
+                                   &dRdy,
+                                   &dTdx,
+                                   &dTdy,
 #endif
-				&inside);
+                                   &inside);
 
-			if(!inside && fresnel != 1.0f) {
+      if (!inside && fresnel != 1.0f) {
 
-				*omega_in = T;
+        *omega_in = T;
 #ifdef __RAY_DIFFERENTIALS__
-				*domega_in_dx = dTdx;
-				*domega_in_dy = dTdy;
+        *domega_in_dx = dTdx;
+        *domega_in_dy = dTdy;
 #endif
 
-				if(alpha_x*alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
-					/* some high number for MIS */
-					*pdf = 1e6f;
-					*eval = make_float3(1e6f, 1e6f, 1e6f);
-					label = LABEL_TRANSMIT | LABEL_SINGULAR;
-				}
-				else {
-					/* eq. 33 */
-					float alpha2 = alpha_x * alpha_y;
-					float cosThetaM2 = cosThetaM * cosThetaM;
-					float cosThetaM4 = cosThetaM2 * cosThetaM2;
-					float tanThetaM2 = 1/(cosThetaM2) - 1;
-					float D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
-
-					/* eval BRDF*cosNI */
-					float cosNI = dot(N, *omega_in);
-
-					/* eq. 34: now calculate G1(i,m) */
-					float G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
-
-					/* eq. 21 */
-					float cosHI = dot(m, *omega_in);
-					float cosHO = dot(m, I);
-					float Ht2 = m_eta * cosHI + cosHO;
-					Ht2 *= Ht2;
-
-					/* see eval function for derivation */
-					float common = (G1o * D) * (m_eta * m_eta) / (cosNO * Ht2);
-					float out = G1i * fabsf(cosHI * cosHO) * common;
-					*pdf = cosHO * fabsf(cosHI) * common;
-
-					*eval = make_float3(out, out, out);
-				}
-			}
-		}
-	}
-	else {
-		label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
-	}
-	return label;
+        if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
+          /* some high number for MIS */
+          *pdf = 1e6f;
+          *eval = make_float3(1e6f, 1e6f, 1e6f);
+          label = LABEL_TRANSMIT | LABEL_SINGULAR;
+        }
+        else {
+          /* eq. 33 */
+          float alpha2 = alpha_x * alpha_y;
+          float cosThetaM2 = cosThetaM * cosThetaM;
+          float cosThetaM4 = cosThetaM2 * cosThetaM2;
+          float tanThetaM2 = 1 / (cosThetaM2)-1;
+          float D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
+
+          /* eval BRDF*cosNI */
+          float cosNI = dot(N, *omega_in);
+
+          /* eq. 34: now calculate G1(i,m) */
+          float G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+
+          /* eq. 21 */
+          float cosHI = dot(m, *omega_in);
+          float cosHO = dot(m, I);
+          float Ht2 = m_eta * cosHI + cosHO;
+          Ht2 *= Ht2;
+
+          /* see eval function for derivation */
+          float common = (G1o * D) * (m_eta * m_eta) / (cosNO * Ht2);
+          float out = G1i * fabsf(cosHI * cosHO) * common;
+          *pdf = cosHO * fabsf(cosHI) * common;
+
+          *eval = make_float3(out, out, out);
+        }
+      }
+    }
+  }
+  else {
+    label = (m_refractive) ? LABEL_TRANSMIT | LABEL_GLOSSY : LABEL_REFLECT | LABEL_GLOSSY;
+  }
+  return label;
 }
 
 /* Beckmann microfacet with Smith shadow-masking from:
@@ -764,364 +803,392 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 
 ccl_device int bsdf_microfacet_beckmann_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->alpha_x = saturate(bsdf->alpha_x);
-	bsdf->alpha_y = bsdf->alpha_x;
+  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_y = bsdf->alpha_x;
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device int bsdf_microfacet_beckmann_aniso_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->alpha_x = saturate(bsdf->alpha_x);
-	bsdf->alpha_y = saturate(bsdf->alpha_y);
+  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_y = saturate(bsdf->alpha_y);
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID;
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device int bsdf_microfacet_beckmann_refraction_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->alpha_x = saturate(bsdf->alpha_x);
-	bsdf->alpha_y = bsdf->alpha_x;
+  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_y = bsdf->alpha_x;
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device void bsdf_microfacet_beckmann_blur(ShaderClosure *sc, float roughness)
 {
-	MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
 
-	bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
-	bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
+  bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+  bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
 }
 
 ccl_device_inline float bsdf_beckmann_G1(float alpha, float cos_n)
 {
-	cos_n *= cos_n;
-	float invA = alpha * safe_sqrtf((1.0f - cos_n) / cos_n);
-	if(invA < 0.625f) {
-		return 1.0f;
-	}
-
-	float a = 1.0f / invA;
-	return ((2.181f*a + 3.535f)*a) / ((2.577f*a + 2.276f)*a + 1.0f);
+  cos_n *= cos_n;
+  float invA = alpha * safe_sqrtf((1.0f - cos_n) / cos_n);
+  if (invA < 0.625f) {
+    return 1.0f;
+  }
+
+  float a = 1.0f / invA;
+  return ((2.181f * a + 3.535f) * a) / ((2.577f * a + 2.276f) * a + 1.0f);
 }
 
-ccl_device_inline float bsdf_beckmann_aniso_G1(float alpha_x, float alpha_y, float cos_n, float cos_phi, float sin_phi)
+ccl_device_inline float bsdf_beckmann_aniso_G1(
+    float alpha_x, float alpha_y, float cos_n, float cos_phi, float sin_phi)
 {
-	cos_n *= cos_n;
-	sin_phi *= sin_phi;
-	cos_phi *= cos_phi;
-	alpha_x *= alpha_x;
-	alpha_y *= alpha_y;
-
-	float alphaO2 = (cos_phi*alpha_x + sin_phi*alpha_y) / (cos_phi + sin_phi);
-	float invA = safe_sqrtf(alphaO2 * (1 - cos_n) / cos_n);
-	if(invA < 0.625f) {
-		return 1.0f;
-	}
-
-	float a = 1.0f / invA;
-	return ((2.181f*a + 3.535f)*a) / ((2.577f*a + 2.276f)*a + 1.0f);
+  cos_n *= cos_n;
+  sin_phi *= sin_phi;
+  cos_phi *= cos_phi;
+  alpha_x *= alpha_x;
+  alpha_y *= alpha_y;
+
+  float alphaO2 = (cos_phi * alpha_x + sin_phi * alpha_y) / (cos_phi + sin_phi);
+  float invA = safe_sqrtf(alphaO2 * (1 - cos_n) / cos_n);
+  if (invA < 0.625f) {
+    return 1.0f;
+  }
+
+  float a = 1.0f / invA;
+  return ((2.181f * a + 3.535f) * a) / ((2.577f * a + 2.276f) * a + 1.0f);
 }
 
-ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc,
+                                                        const float3 I,
+                                                        const float3 omega_in,
+                                                        float *pdf)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-	float alpha_x = bsdf->alpha_x;
-	float alpha_y = bsdf->alpha_y;
-	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
-	float3 N = bsdf->N;
-
-	if(m_refractive || alpha_x*alpha_y <= 1e-7f)
-		return make_float3(0.0f, 0.0f, 0.0f);
-
-	float cosNO = dot(N, I);
-	float cosNI = dot(N, omega_in);
-
-	if(cosNO > 0 && cosNI > 0) {
-		/* get half vector */
-		float3 m = normalize(omega_in + I);
-
-		float alpha2 = alpha_x * alpha_y;
-		float D, G1o, G1i;
-
-		if(alpha_x == alpha_y) {
-			/* isotropic
-			 * eq. 20: (F*G*D)/(4*in*on)
-			 * eq. 25: first we calculate D(m) */
-			float cosThetaM = dot(N, m);
-			float cosThetaM2 = cosThetaM * cosThetaM;
-			float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
-			float cosThetaM4 = cosThetaM2 * cosThetaM2;
-			D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
-
-			/* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
-			G1o = bsdf_beckmann_G1(alpha_x, cosNO);
-			G1i = bsdf_beckmann_G1(alpha_x, cosNI);
-		}
-		else {
-			/* anisotropic */
-			float3 X, Y, Z = N;
-			make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
-
-			/* distribution */
-			float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
-			float slope_x = -local_m.x/(local_m.z*alpha_x);
-			float slope_y = -local_m.y/(local_m.z*alpha_y);
-
-			float cosThetaM = local_m.z;
-			float cosThetaM2 = cosThetaM * cosThetaM;
-			float cosThetaM4 = cosThetaM2 * cosThetaM2;
-
-			D = expf(-slope_x*slope_x - slope_y*slope_y) / (M_PI_F * alpha2 * cosThetaM4);
-
-			/* G1(i,m) and G1(o,m) */
-			G1o = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNO, dot(I, X), dot(I, Y));
-			G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNI, dot(omega_in, X), dot(omega_in, Y));
-		}
-
-		float G = G1o * G1i;
-
-		/* eq. 20 */
-		float common = D * 0.25f / cosNO;
-		float out = G * common;
-
-		/* eq. 2 in distribution of visible normals sampling
-		 * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
-
-		/* eq. 38 - but see also:
-		 * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
-		 * pdf = pm * 0.25 / dot(m, I); */
-		*pdf = G1o * common;
-
-		return make_float3(out, out, out);
-	}
-
-	return make_float3(0.0f, 0.0f, 0.0f);
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  float alpha_x = bsdf->alpha_x;
+  float alpha_y = bsdf->alpha_y;
+  bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+  float3 N = bsdf->N;
+
+  if (m_refractive || alpha_x * alpha_y <= 1e-7f)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+  float cosNO = dot(N, I);
+  float cosNI = dot(N, omega_in);
+
+  if (cosNO > 0 && cosNI > 0) {
+    /* get half vector */
+    float3 m = normalize(omega_in + I);
+
+    float alpha2 = alpha_x * alpha_y;
+    float D, G1o, G1i;
+
+    if (alpha_x == alpha_y) {
+      /* isotropic
+       * eq. 20: (F*G*D)/(4*in*on)
+       * eq. 25: first we calculate D(m) */
+      float cosThetaM = dot(N, m);
+      float cosThetaM2 = cosThetaM * cosThetaM;
+      float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
+      float cosThetaM4 = cosThetaM2 * cosThetaM2;
+      D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
+
+      /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
+      G1o = bsdf_beckmann_G1(alpha_x, cosNO);
+      G1i = bsdf_beckmann_G1(alpha_x, cosNI);
+    }
+    else {
+      /* anisotropic */
+      float3 X, Y, Z = N;
+      make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+
+      /* distribution */
+      float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+      float slope_x = -local_m.x / (local_m.z * alpha_x);
+      float slope_y = -local_m.y / (local_m.z * alpha_y);
+
+      float cosThetaM = local_m.z;
+      float cosThetaM2 = cosThetaM * cosThetaM;
+      float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+      D = expf(-slope_x * slope_x - slope_y * slope_y) / (M_PI_F * alpha2 * cosThetaM4);
+
+      /* G1(i,m) and G1(o,m) */
+      G1o = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNO, dot(I, X), dot(I, Y));
+      G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNI, dot(omega_in, X), dot(omega_in, Y));
+    }
+
+    float G = G1o * G1i;
+
+    /* eq. 20 */
+    float common = D * 0.25f / cosNO;
+    float out = G * common;
+
+    /* eq. 2 in distribution of visible normals sampling
+     * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+
+    /* eq. 38 - but see also:
+     * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
+     * pdf = pm * 0.25 / dot(m, I); */
+    *pdf = G1o * common;
+
+    return make_float3(out, out, out);
+  }
+
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc,
+                                                         const float3 I,
+                                                         const float3 omega_in,
+                                                         float *pdf)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-	float alpha_x = bsdf->alpha_x;
-	float alpha_y = bsdf->alpha_y;
-	float m_eta = bsdf->ior;
-	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
-	float3 N = bsdf->N;
-
-	if(!m_refractive || alpha_x*alpha_y <= 1e-7f)
-		return make_float3(0.0f, 0.0f, 0.0f);
-
-	float cosNO = dot(N, I);
-	float cosNI = dot(N, omega_in);
-
-	if(cosNO <= 0 || cosNI >= 0)
-		return make_float3(0.0f, 0.0f, 0.0f);
-
-	/* compute half-vector of the refraction (eq. 16) */
-	float3 ht = -(m_eta * omega_in + I);
-	float3 Ht = normalize(ht);
-	float cosHO = dot(Ht, I);
-	float cosHI = dot(Ht, omega_in);
-
-	/* eq. 25: first we calculate D(m) with m=Ht: */
-	float alpha2 = alpha_x * alpha_y;
-	float cosThetaM = min(dot(N, Ht), 1.0f);
-	float cosThetaM2 = cosThetaM * cosThetaM;
-	float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
-	float cosThetaM4 = cosThetaM2 * cosThetaM2;
-	float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 *  cosThetaM4);
-
-	/* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
-	float G1o = bsdf_beckmann_G1(alpha_x, cosNO);
-	float G1i = bsdf_beckmann_G1(alpha_x, cosNI);
-	float G = G1o * G1i;
-
-	/* probability */
-	float Ht2 = dot(ht, ht);
-
-	/* eq. 2 in distribution of visible normals sampling
-	 * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
-
-	/* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2)
-	 * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */
-	float common = D * (m_eta * m_eta) / (cosNO * Ht2);
-	float out = G * fabsf(cosHI * cosHO) * common;
-	*pdf = G1o * fabsf(cosHO * cosHI) * common;
-
-	return make_float3(out, out, out);
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  float alpha_x = bsdf->alpha_x;
+  float alpha_y = bsdf->alpha_y;
+  float m_eta = bsdf->ior;
+  bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+  float3 N = bsdf->N;
+
+  if (!m_refractive || alpha_x * alpha_y <= 1e-7f)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+  float cosNO = dot(N, I);
+  float cosNI = dot(N, omega_in);
+
+  if (cosNO <= 0 || cosNI >= 0)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+  /* compute half-vector of the refraction (eq. 16) */
+  float3 ht = -(m_eta * omega_in + I);
+  float3 Ht = normalize(ht);
+  float cosHO = dot(Ht, I);
+  float cosHI = dot(Ht, omega_in);
+
+  /* eq. 25: first we calculate D(m) with m=Ht: */
+  float alpha2 = alpha_x * alpha_y;
+  float cosThetaM = min(dot(N, Ht), 1.0f);
+  float cosThetaM2 = cosThetaM * cosThetaM;
+  float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
+  float cosThetaM4 = cosThetaM2 * cosThetaM2;
+  float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
+
+  /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
+  float G1o = bsdf_beckmann_G1(alpha_x, cosNO);
+  float G1i = bsdf_beckmann_G1(alpha_x, cosNI);
+  float G = G1o * G1i;
+
+  /* probability */
+  float Ht2 = dot(ht, ht);
+
+  /* eq. 2 in distribution of visible normals sampling
+   * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+
+  /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2)
+   * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */
+  float common = D * (m_eta * m_eta) / (cosNO * Ht2);
+  float out = G * fabsf(cosHI * cosHO) * common;
+  *pdf = G1o * fabsf(cosHO * cosHI) * common;
+
+  return make_float3(out, out, out);
 }
 
-ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg,
+                                               const ShaderClosure *sc,
+                                               float3 Ng,
+                                               float3 I,
+                                               float3 dIdx,
+                                               float3 dIdy,
+                                               float randu,
+                                               float randv,
+                                               float3 *eval,
+                                               float3 *omega_in,
+                                               float3 *domega_in_dx,
+                                               float3 *domega_in_dy,
+                                               float *pdf)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-	float alpha_x = bsdf->alpha_x;
-	float alpha_y = bsdf->alpha_y;
-	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
-	float3 N = bsdf->N;
-	int label;
-
-	float cosNO = dot(N, I);
-	if(cosNO > 0) {
-		float3 X, Y, Z = N;
-
-		if(alpha_x == alpha_y)
-			make_orthonormals(Z, &X, &Y);
-		else
-			make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
-
-		/* importance sampling with distribution of visible normals. vectors are
-		 * transformed to local space before and after */
-		float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO);
-		float3 local_m;
-		float G1o;
-
-		local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_x,
-			randu, randv, true, &G1o);
-
-		float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z;
-		float cosThetaM = local_m.z;
-
-		/* reflection or refraction? */
-		if(!m_refractive) {
-			label = LABEL_REFLECT | LABEL_GLOSSY;
-			float cosMO = dot(m, I);
-
-			if(cosMO > 0) {
-				/* eq. 39 - compute actual reflected direction */
-				*omega_in = 2 * cosMO * m - I;
-
-				if(dot(Ng, *omega_in) > 0) {
-					if(alpha_x*alpha_y <= 1e-7f) {
-						/* some high number for MIS */
-						*pdf = 1e6f;
-						*eval = make_float3(1e6f, 1e6f, 1e6f);
-						label = LABEL_REFLECT | LABEL_SINGULAR;
-					}
-					else {
-						/* microfacet normal is visible to this ray
-						 * eq. 25 */
-						float alpha2 = alpha_x * alpha_y;
-						float D, G1i;
-
-						if(alpha_x == alpha_y) {
-							/* istropic distribution */
-							float cosThetaM2 = cosThetaM * cosThetaM;
-							float cosThetaM4 = cosThetaM2 * cosThetaM2;
-							float tanThetaM2 = 1/(cosThetaM2) - 1;
-							D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 *  cosThetaM4);
-
-							/* eval BRDF*cosNI */
-							float cosNI = dot(N, *omega_in);
-
-							/* eq. 26, 27: now calculate G1(i,m) */
-							G1i = bsdf_beckmann_G1(alpha_x, cosNI);
-						}
-						else {
-							/* anisotropic distribution */
-							float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
-							float slope_x = -local_m.x/(local_m.z*alpha_x);
-							float slope_y = -local_m.y/(local_m.z*alpha_y);
-
-							float cosThetaM = local_m.z;
-							float cosThetaM2 = cosThetaM * cosThetaM;
-							float cosThetaM4 = cosThetaM2 * cosThetaM2;
-
-							D = expf(-slope_x*slope_x - slope_y*slope_y) / (M_PI_F * alpha2 * cosThetaM4);
-
-							/* G1(i,m) */
-							G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, dot(*omega_in, N), dot(*omega_in, X), dot(*omega_in, Y));
-						}
-
-						float G = G1o * G1i;
-
-						/* see eval function for derivation */
-						float common = D * 0.25f / cosNO;
-						float out = G * common;
-						*pdf = G1o * common;
-
-						*eval = make_float3(out, out, out);
-					}
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  float alpha_x = bsdf->alpha_x;
+  float alpha_y = bsdf->alpha_y;
+  bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+  float3 N = bsdf->N;
+  int label;
+
+  float cosNO = dot(N, I);
+  if (cosNO > 0) {
+    float3 X, Y, Z = N;
+
+    if (alpha_x == alpha_y)
+      make_orthonormals(Z, &X, &Y);
+    else
+      make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+
+    /* importance sampling with distribution of visible normals. vectors are
+     * transformed to local space before and after */
+    float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO);
+    float3 local_m;
+    float G1o;
+
+    local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_x, randu, randv, true, &G1o);
+
+    float3 m = X * local_m.x + Y * local_m.y + Z * local_m.z;
+    float cosThetaM = local_m.z;
+
+    /* reflection or refraction? */
+    if (!m_refractive) {
+      label = LABEL_REFLECT | LABEL_GLOSSY;
+      float cosMO = dot(m, I);
+
+      if (cosMO > 0) {
+        /* eq. 39 - compute actual reflected direction */
+        *omega_in = 2 * cosMO * m - I;
+
+        if (dot(Ng, *omega_in) > 0) {
+          if (alpha_x * alpha_y <= 1e-7f) {
+            /* some high number for MIS */
+            *pdf = 1e6f;
+            *eval = make_float3(1e6f, 1e6f, 1e6f);
+            label = LABEL_REFLECT | LABEL_SINGULAR;
+          }
+          else {
+            /* microfacet normal is visible to this ray
+             * eq. 25 */
+            float alpha2 = alpha_x * alpha_y;
+            float D, G1i;
+
+            if (alpha_x == alpha_y) {
+              /* istropic distribution */
+              float cosThetaM2 = cosThetaM * cosThetaM;
+              float cosThetaM4 = cosThetaM2 * cosThetaM2;
+              float tanThetaM2 = 1 / (cosThetaM2)-1;
+              D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
+
+              /* eval BRDF*cosNI */
+              float cosNI = dot(N, *omega_in);
+
+              /* eq. 26, 27: now calculate G1(i,m) */
+              G1i = bsdf_beckmann_G1(alpha_x, cosNI);
+            }
+            else {
+              /* anisotropic distribution */
+              float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+              float slope_x = -local_m.x / (local_m.z * alpha_x);
+              float slope_y = -local_m.y / (local_m.z * alpha_y);
+
+              float cosThetaM = local_m.z;
+              float cosThetaM2 = cosThetaM * cosThetaM;
+              float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+              D = expf(-slope_x * slope_x - slope_y * slope_y) / (M_PI_F * alpha2 * cosThetaM4);
+
+              /* G1(i,m) */
+              G1i = bsdf_beckmann_aniso_G1(
+                  alpha_x, alpha_y, dot(*omega_in, N), dot(*omega_in, X), dot(*omega_in, Y));
+            }
+
+            float G = G1o * G1i;
+
+            /* see eval function for derivation */
+            float common = D * 0.25f / cosNO;
+            float out = G * common;
+            *pdf = G1o * common;
+
+            *eval = make_float3(out, out, out);
+          }
 
 #ifdef __RAY_DIFFERENTIALS__
-					*domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
-					*domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
+          *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
+          *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
 #endif
-				}
-			}
-		}
-		else {
-			label = LABEL_TRANSMIT | LABEL_GLOSSY;
-
-			/* CAUTION: the i and o variables are inverted relative to the paper
-			 * eq. 39 - compute actual refractive direction */
-			float3 R, T;
+        }
+      }
+    }
+    else {
+      label = LABEL_TRANSMIT | LABEL_GLOSSY;
+
+      /* CAUTION: the i and o variables are inverted relative to the paper
+       * eq. 39 - compute actual refractive direction */
+      float3 R, T;
 #ifdef __RAY_DIFFERENTIALS__
-			float3 dRdx, dRdy, dTdx, dTdy;
+      float3 dRdx, dRdy, dTdx, dTdy;
 #endif
-			float m_eta = bsdf->ior, fresnel;
-			bool inside;
-
-			fresnel = fresnel_dielectric(m_eta, m, I, &R, &T,
+      float m_eta = bsdf->ior, fresnel;
+      bool inside;
+
+      fresnel = fresnel_dielectric(m_eta,
+                                   m,
+                                   I,
+                                   &R,
+                                   &T,
 #ifdef __RAY_DIFFERENTIALS__
-				dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy,
+                                   dIdx,
+                                   dIdy,
+                                   &dRdx,
+                                   &dRdy,
+                                   &dTdx,
+                                   &dTdy,
 #endif
-				&inside);
+                                   &inside);
 
-			if(!inside && fresnel != 1.0f) {
-				*omega_in = T;
+      if (!inside && fresnel != 1.0f) {
+        *omega_in = T;
 
 #ifdef __RAY_DIFFERENTIALS__
-				*domega_in_dx = dTdx;
-				*domega_in_dy = dTdy;
+        *domega_in_dx = dTdx;
+        *domega_in_dy = dTdy;
 #endif
 
-				if(alpha_x*alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
-					/* some high number for MIS */
-					*pdf = 1e6f;
-					*eval = make_float3(1e6f, 1e6f, 1e6f);
-					label = LABEL_TRANSMIT | LABEL_SINGULAR;
-				}
-				else {
-					/* eq. 33 */
-					float alpha2 = alpha_x * alpha_y;
-					float cosThetaM2 = cosThetaM * cosThetaM;
-					float cosThetaM4 = cosThetaM2 * cosThetaM2;
-					float tanThetaM2 = 1/(cosThetaM2) - 1;
-					float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 *  cosThetaM4);
-
-					/* eval BRDF*cosNI */
-					float cosNI = dot(N, *omega_in);
-
-					/* eq. 26, 27: now calculate G1(i,m) */
-					float G1i = bsdf_beckmann_G1(alpha_x, cosNI);
-					float G = G1o * G1i;
-
-					/* eq. 21 */
-					float cosHI = dot(m, *omega_in);
-					float cosHO = dot(m, I);
-					float Ht2 = m_eta * cosHI + cosHO;
-					Ht2 *= Ht2;
-
-					/* see eval function for derivation */
-					float common = D * (m_eta * m_eta) / (cosNO * Ht2);
-					float out = G * fabsf(cosHI * cosHO) * common;
-					*pdf = G1o * cosHO * fabsf(cosHI) * common;
-
-					*eval = make_float3(out, out, out);
-				}
-			}
-		}
-	}
-	else {
-		label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
-	}
-	return label;
+        if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
+          /* some high number for MIS */
+          *pdf = 1e6f;
+          *eval = make_float3(1e6f, 1e6f, 1e6f);
+          label = LABEL_TRANSMIT | LABEL_SINGULAR;
+        }
+        else {
+          /* eq. 33 */
+          float alpha2 = alpha_x * alpha_y;
+          float cosThetaM2 = cosThetaM * cosThetaM;
+          float cosThetaM4 = cosThetaM2 * cosThetaM2;
+          float tanThetaM2 = 1 / (cosThetaM2)-1;
+          float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
+
+          /* eval BRDF*cosNI */
+          float cosNI = dot(N, *omega_in);
+
+          /* eq. 26, 27: now calculate G1(i,m) */
+          float G1i = bsdf_beckmann_G1(alpha_x, cosNI);
+          float G = G1o * G1i;
+
+          /* eq. 21 */
+          float cosHI = dot(m, *omega_in);
+          float cosHO = dot(m, I);
+          float Ht2 = m_eta * cosHI + cosHO;
+          Ht2 *= Ht2;
+
+          /* see eval function for derivation */
+          float common = D * (m_eta * m_eta) / (cosNO * Ht2);
+          float out = G * fabsf(cosHI * cosHO) * common;
+          *pdf = G1o * cosHO * fabsf(cosHI) * common;
+
+          *eval = make_float3(out, out, out);
+        }
+      }
+    }
+  }
+  else {
+    label = (m_refractive) ? LABEL_TRANSMIT | LABEL_GLOSSY : LABEL_REFLECT | LABEL_GLOSSY;
+  }
+  return label;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_MICROFACET_H__ */
+#endif /* __BSDF_MICROFACET_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
index 2f2c35d5d1f..2cc1a9c5299 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -23,149 +23,168 @@ CCL_NAMESPACE_BEGIN
 /* Isotropic GGX microfacet distribution */
 ccl_device_forceinline float D_ggx(float3 wm, float alpha)
 {
-	wm.z *= wm.z;
-	alpha *= alpha;
-	float tmp = (1.0f - wm.z) + alpha * wm.z;
-	return alpha / max(M_PI_F * tmp*tmp, 1e-7f);
+  wm.z *= wm.z;
+  alpha *= alpha;
+  float tmp = (1.0f - wm.z) + alpha * wm.z;
+  return alpha / max(M_PI_F * tmp * tmp, 1e-7f);
 }
 
 /* Anisotropic GGX microfacet distribution */
 ccl_device_forceinline float D_ggx_aniso(const float3 wm, const float2 alpha)
 {
-	float slope_x = -wm.x/alpha.x;
-	float slope_y = -wm.y/alpha.y;
-	float tmp = wm.z*wm.z + slope_x*slope_x + slope_y*slope_y;
+  float slope_x = -wm.x / alpha.x;
+  float slope_y = -wm.y / alpha.y;
+  float tmp = wm.z * wm.z + slope_x * slope_x + slope_y * slope_y;
 
-	return 1.0f / max(M_PI_F * tmp*tmp * alpha.x*alpha.y, 1e-7f);
+  return 1.0f / max(M_PI_F * tmp * tmp * alpha.x * alpha.y, 1e-7f);
 }
 
 /* Sample slope distribution (based on page 14 of the supplemental implementation). */
-ccl_device_forceinline float2 mf_sampleP22_11(const float cosI, const float randx, const float randy)
-{
-	if(cosI > 0.9999f || fabsf(cosI) < 1e-6f) {
-		const float r = sqrtf(randx / max(1.0f - randx, 1e-7f));
-		const float phi = M_2PI_F * randy;
-		return make_float2(r*cosf(phi), r*sinf(phi));
-	}
-
-	const float sinI = safe_sqrtf(1.0f - cosI*cosI);
-	const float tanI = sinI/cosI;
-	const float projA = 0.5f * (cosI + 1.0f);
-	if(projA < 0.0001f)
-		return make_float2(0.0f, 0.0f);
-	const float A = 2.0f*randx*projA / cosI - 1.0f;
-	float tmp = A*A-1.0f;
-	if(fabsf(tmp) < 1e-7f)
-		return make_float2(0.0f, 0.0f);
-	tmp = 1.0f / tmp;
-	const float D = safe_sqrtf(tanI*tanI*tmp*tmp - (A*A-tanI*tanI)*tmp);
-
-	const float slopeX2 = tanI*tmp + D;
-	const float slopeX = (A < 0.0f || slopeX2 > 1.0f/tanI)? (tanI*tmp - D) : slopeX2;
-
-	float U2;
-	if(randy >= 0.5f)
-		U2 = 2.0f*(randy - 0.5f);
-	else
-		U2 = 2.0f*(0.5f - randy);
-	const float z = (U2*(U2*(U2*0.27385f-0.73369f)+0.46341f)) / (U2*(U2*(U2*0.093073f+0.309420f)-1.0f)+0.597999f);
-	const float slopeY = z * sqrtf(1.0f + slopeX*slopeX);
-
-	if(randy >= 0.5f)
-		return make_float2(slopeX, slopeY);
-	else
-		return make_float2(slopeX, -slopeY);
+ccl_device_forceinline float2 mf_sampleP22_11(const float cosI,
+                                              const float randx,
+                                              const float randy)
+{
+  if (cosI > 0.9999f || fabsf(cosI) < 1e-6f) {
+    const float r = sqrtf(randx / max(1.0f - randx, 1e-7f));
+    const float phi = M_2PI_F * randy;
+    return make_float2(r * cosf(phi), r * sinf(phi));
+  }
+
+  const float sinI = safe_sqrtf(1.0f - cosI * cosI);
+  const float tanI = sinI / cosI;
+  const float projA = 0.5f * (cosI + 1.0f);
+  if (projA < 0.0001f)
+    return make_float2(0.0f, 0.0f);
+  const float A = 2.0f * randx * projA / cosI - 1.0f;
+  float tmp = A * A - 1.0f;
+  if (fabsf(tmp) < 1e-7f)
+    return make_float2(0.0f, 0.0f);
+  tmp = 1.0f / tmp;
+  const float D = safe_sqrtf(tanI * tanI * tmp * tmp - (A * A - tanI * tanI) * tmp);
+
+  const float slopeX2 = tanI * tmp + D;
+  const float slopeX = (A < 0.0f || slopeX2 > 1.0f / tanI) ? (tanI * tmp - D) : slopeX2;
+
+  float U2;
+  if (randy >= 0.5f)
+    U2 = 2.0f * (randy - 0.5f);
+  else
+    U2 = 2.0f * (0.5f - randy);
+  const float z = (U2 * (U2 * (U2 * 0.27385f - 0.73369f) + 0.46341f)) /
+                  (U2 * (U2 * (U2 * 0.093073f + 0.309420f) - 1.0f) + 0.597999f);
+  const float slopeY = z * sqrtf(1.0f + slopeX * slopeX);
+
+  if (randy >= 0.5f)
+    return make_float2(slopeX, slopeY);
+  else
+    return make_float2(slopeX, -slopeY);
 }
 
 /* Visible normal sampling for the GGX distribution (based on page 7 of the supplemental implementation). */
-ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, const float2 alpha, const float randx, const float randy)
+ccl_device_forceinline float3 mf_sample_vndf(const float3 wi,
+                                             const float2 alpha,
+                                             const float randx,
+                                             const float randy)
 {
-	const float3 wi_11 = normalize(make_float3(alpha.x*wi.x, alpha.y*wi.y, wi.z));
-	const float2 slope_11 = mf_sampleP22_11(wi_11.z, randx, randy);
+  const float3 wi_11 = normalize(make_float3(alpha.x * wi.x, alpha.y * wi.y, wi.z));
+  const float2 slope_11 = mf_sampleP22_11(wi_11.z, randx, randy);
 
-	const float3 cossin_phi = safe_normalize(make_float3(wi_11.x, wi_11.y, 0.0f));
-	const float slope_x = alpha.x*(cossin_phi.x * slope_11.x - cossin_phi.y * slope_11.y);
-	const float slope_y = alpha.y*(cossin_phi.y * slope_11.x + cossin_phi.x * slope_11.y);
+  const float3 cossin_phi = safe_normalize(make_float3(wi_11.x, wi_11.y, 0.0f));
+  const float slope_x = alpha.x * (cossin_phi.x * slope_11.x - cossin_phi.y * slope_11.y);
+  const float slope_y = alpha.y * (cossin_phi.y * slope_11.x + cossin_phi.x * slope_11.y);
 
-	kernel_assert(isfinite(slope_x));
-	return normalize(make_float3(-slope_x, -slope_y, 1.0f));
+  kernel_assert(isfinite(slope_x));
+  return normalize(make_float3(-slope_x, -slope_y, 1.0f));
 }
 
 /* === Phase functions: Glossy and Glass === */
 
 /* Phase function for reflective materials. */
-ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi, float3 *weight, const float3 wm)
+ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi,
+                                                     float3 *weight,
+                                                     const float3 wm)
 {
-	return -wi + 2.0f * wm * dot(wi, wm);
+  return -wi + 2.0f * wm * dot(wi, wm);
 }
 
-ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w, const float lambda, const float3 wo, const float2 alpha)
+ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w,
+                                                   const float lambda,
+                                                   const float3 wo,
+                                                   const float2 alpha)
 {
-	if(w.z > 0.9999f)
-		return make_float3(0.0f, 0.0f, 0.0f);
+  if (w.z > 0.9999f)
+    return make_float3(0.0f, 0.0f, 0.0f);
 
-	const float3 wh = normalize(wo - w);
-	if(wh.z < 0.0f)
-		return make_float3(0.0f, 0.0f, 0.0f);
+  const float3 wh = normalize(wo - w);
+  if (wh.z < 0.0f)
+    return make_float3(0.0f, 0.0f, 0.0f);
 
-	float pArea = (w.z < -0.9999f)? 1.0f: lambda*w.z;
+  float pArea = (w.z < -0.9999f) ? 1.0f : lambda * w.z;
 
-	const float dotW_WH = dot(-w, wh);
-	if(dotW_WH < 0.0f)
-		return make_float3(0.0f, 0.0f, 0.0f);
+  const float dotW_WH = dot(-w, wh);
+  if (dotW_WH < 0.0f)
+    return make_float3(0.0f, 0.0f, 0.0f);
 
-	float phase = max(0.0f, dotW_WH) * 0.25f / max(pArea * dotW_WH, 1e-7f);
-	if(alpha.x == alpha.y)
-		phase *= D_ggx(wh, alpha.x);
-	else
-		phase *= D_ggx_aniso(wh, alpha);
+  float phase = max(0.0f, dotW_WH) * 0.25f / max(pArea * dotW_WH, 1e-7f);
+  if (alpha.x == alpha.y)
+    phase *= D_ggx(wh, alpha.x);
+  else
+    phase *= D_ggx_aniso(wh, alpha);
 
-	return make_float3(phase, phase, phase);
+  return make_float3(phase, phase, phase);
 }
 
 /* Phase function for dielectric transmissive materials, including both reflection and refraction according to the dielectric fresnel term. */
-ccl_device_forceinline float3 mf_sample_phase_glass(const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
-{
-	float cosI = dot(wi, wm);
-	float f = fresnel_dielectric_cos(cosI, eta);
-	if(randV < f) {
-		*outside = true;
-		return -wi + 2.0f * wm * cosI;
-	}
-	*outside = false;
-	float inv_eta = 1.0f/eta;
-	float cosT = -safe_sqrtf(1.0f - (1.0f - cosI*cosI) * inv_eta*inv_eta);
-	return normalize(wm*(cosI*inv_eta + cosT) - wi*inv_eta);
-}
-
-ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w, const float lambda, const float3 wo, const bool wo_outside, const float2 alpha, const float eta)
-{
-	if(w.z > 0.9999f)
-		return make_float3(0.0f, 0.0f, 0.0f);
-
-	float pArea = (w.z < -0.9999f)? 1.0f: lambda*w.z;
-	float v;
-	if(wo_outside) {
-		const float3 wh = normalize(wo - w);
-		if(wh.z < 0.0f)
-			return make_float3(0.0f, 0.0f, 0.0f);
-
-		const float dotW_WH = dot(-w, wh);
-		v = fresnel_dielectric_cos(dotW_WH, eta) * max(0.0f, dotW_WH) * D_ggx(wh, alpha.x) * 0.25f / (pArea * dotW_WH);
-	}
-	else {
-		float3 wh = normalize(wo*eta - w);
-		if(wh.z < 0.0f)
-			wh = -wh;
-		const float dotW_WH = dot(-w, wh), dotWO_WH = dot(wo, wh);
-		if(dotW_WH < 0.0f)
-			return make_float3(0.0f, 0.0f, 0.0f);
-
-		float temp = dotW_WH + eta*dotWO_WH;
-		v = (1.0f - fresnel_dielectric_cos(dotW_WH, eta)) * max(0.0f, dotW_WH) * max(0.0f, -dotWO_WH) * D_ggx(wh, alpha.x) / (pArea * temp * temp);
-	}
-
-	return make_float3(v, v, v);
+ccl_device_forceinline float3 mf_sample_phase_glass(
+    const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
+{
+  float cosI = dot(wi, wm);
+  float f = fresnel_dielectric_cos(cosI, eta);
+  if (randV < f) {
+    *outside = true;
+    return -wi + 2.0f * wm * cosI;
+  }
+  *outside = false;
+  float inv_eta = 1.0f / eta;
+  float cosT = -safe_sqrtf(1.0f - (1.0f - cosI * cosI) * inv_eta * inv_eta);
+  return normalize(wm * (cosI * inv_eta + cosT) - wi * inv_eta);
+}
+
+ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w,
+                                                  const float lambda,
+                                                  const float3 wo,
+                                                  const bool wo_outside,
+                                                  const float2 alpha,
+                                                  const float eta)
+{
+  if (w.z > 0.9999f)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+  float pArea = (w.z < -0.9999f) ? 1.0f : lambda * w.z;
+  float v;
+  if (wo_outside) {
+    const float3 wh = normalize(wo - w);
+    if (wh.z < 0.0f)
+      return make_float3(0.0f, 0.0f, 0.0f);
+
+    const float dotW_WH = dot(-w, wh);
+    v = fresnel_dielectric_cos(dotW_WH, eta) * max(0.0f, dotW_WH) * D_ggx(wh, alpha.x) * 0.25f /
+        (pArea * dotW_WH);
+  }
+  else {
+    float3 wh = normalize(wo * eta - w);
+    if (wh.z < 0.0f)
+      wh = -wh;
+    const float dotW_WH = dot(-w, wh), dotWO_WH = dot(wo, wh);
+    if (dotW_WH < 0.0f)
+      return make_float3(0.0f, 0.0f, 0.0f);
+
+    float temp = dotW_WH + eta * dotWO_WH;
+    v = (1.0f - fresnel_dielectric_cos(dotW_WH, eta)) * max(0.0f, dotW_WH) * max(0.0f, -dotWO_WH) *
+        D_ggx(wh, alpha.x) / (pArea * temp * temp);
+  }
+
+  return make_float3(v, v, v);
 }
 
 /* === Utility functions for the random walks === */
@@ -173,64 +192,65 @@ ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w, const float la
 /* Smith Lambda function for GGX (based on page 12 of the supplemental implementation). */
 ccl_device_forceinline float mf_lambda(const float3 w, const float2 alpha)
 {
-	if(w.z > 0.9999f)
-		return 0.0f;
-	else if(w.z < -0.9999f)
-		return -0.9999f;
+  if (w.z > 0.9999f)
+    return 0.0f;
+  else if (w.z < -0.9999f)
+    return -0.9999f;
 
-	const float inv_wz2 = 1.0f / max(w.z*w.z, 1e-7f);
-	const float2 wa = make_float2(w.x, w.y)*alpha;
-	float v = sqrtf(1.0f + dot(wa, wa) * inv_wz2);
-	if(w.z <= 0.0f)
-		v = -v;
+  const float inv_wz2 = 1.0f / max(w.z * w.z, 1e-7f);
+  const float2 wa = make_float2(w.x, w.y) * alpha;
+  float v = sqrtf(1.0f + dot(wa, wa) * inv_wz2);
+  if (w.z <= 0.0f)
+    v = -v;
 
-	return 0.5f*(v - 1.0f);
+  return 0.5f * (v - 1.0f);
 }
 
 /* Height distribution CDF (based on page 4 of the supplemental implementation). */
 ccl_device_forceinline float mf_invC1(const float h)
 {
-	return 2.0f * saturate(h) - 1.0f;
+  return 2.0f * saturate(h) - 1.0f;
 }
 
 ccl_device_forceinline float mf_C1(const float h)
 {
-	return saturate(0.5f * (h + 1.0f));
+  return saturate(0.5f * (h + 1.0f));
 }
 
 /* Masking function (based on page 16 of the supplemental implementation). */
 ccl_device_forceinline float mf_G1(const float3 w, const float C1, const float lambda)
 {
-	if(w.z > 0.9999f)
-		return 1.0f;
-	if(w.z < 1e-5f)
-		return 0.0f;
-	return powf(C1, lambda);
+  if (w.z > 0.9999f)
+    return 1.0f;
+  if (w.z < 1e-5f)
+    return 0.0f;
+  return powf(C1, lambda);
 }
 
 /* Sampling from the visible height distribution (based on page 17 of the supplemental implementation). */
-ccl_device_forceinline bool mf_sample_height(const float3 w, float *h, float *C1, float *G1, float *lambda, const float U)
-{
-	if(w.z > 0.9999f)
-		return false;
-	if(w.z < -0.9999f) {
-		*C1 *= U;
-		*h = mf_invC1(*C1);
-		*G1 = mf_G1(w, *C1, *lambda);
-	}
-	else if(fabsf(w.z) >= 0.0001f) {
-		if(U > 1.0f - *G1)
-			return false;
-		if(*lambda >= 0.0f) {
-			*C1 = 1.0f;
-		}
-		else {
-			*C1 *= powf(1.0f-U, -1.0f / *lambda);
-		}
-		*h = mf_invC1(*C1);
-		*G1 = mf_G1(w, *C1, *lambda);
-	}
-	return true;
+ccl_device_forceinline bool mf_sample_height(
+    const float3 w, float *h, float *C1, float *G1, float *lambda, const float U)
+{
+  if (w.z > 0.9999f)
+    return false;
+  if (w.z < -0.9999f) {
+    *C1 *= U;
+    *h = mf_invC1(*C1);
+    *G1 = mf_G1(w, *C1, *lambda);
+  }
+  else if (fabsf(w.z) >= 0.0001f) {
+    if (U > 1.0f - *G1)
+      return false;
+    if (*lambda >= 0.0f) {
+      *C1 = 1.0f;
+    }
+    else {
+      *C1 *= powf(1.0f - U, -1.0f / *lambda);
+    }
+    *h = mf_invC1(*C1);
+    *G1 = mf_G1(w, *C1, *lambda);
+  }
+  return true;
 }
 
 /* === PDF approximations for the different phase functions. ===
@@ -240,80 +260,92 @@ ccl_device_forceinline bool mf_sample_height(const float3 w, float *h, float *C1
  * the missing energy is then approximated as a diffuse reflection for the PDF. */
 ccl_device_forceinline float mf_ggx_albedo(float r)
 {
-	float albedo = 0.806495f*expf(-1.98712f*r*r) + 0.199531f;
-	albedo -= ((((((1.76741f*r - 8.43891f)*r + 15.784f)*r - 14.398f)*r + 6.45221f)*r - 1.19722f)*r + 0.027803f)*r + 0.00568739f;
-	return saturate(albedo);
+  float albedo = 0.806495f * expf(-1.98712f * r * r) + 0.199531f;
+  albedo -= ((((((1.76741f * r - 8.43891f) * r + 15.784f) * r - 14.398f) * r + 6.45221f) * r -
+              1.19722f) *
+                 r +
+             0.027803f) *
+                r +
+            0.00568739f;
+  return saturate(albedo);
 }
 
 ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior)
 {
-	if(ior < 1.0f) {
-		ior = 1.0f/ior;
-	}
-	a = saturate(a);
-	ior = clamp(ior, 1.0f, 3.0f);
-	float I_1 = 0.0476898f*expf(-0.978352f*(ior-0.65657f)*(ior-0.65657f)) - 0.033756f*ior + 0.993261f;
-	float R_1 = (((0.116991f*a - 0.270369f)*a + 0.0501366f)*a - 0.00411511f)*a + 1.00008f;
-	float I_2 = (((-2.08704f*ior + 26.3298f)*ior - 127.906f)*ior + 292.958f)*ior - 287.946f + 199.803f/(ior*ior) - 101.668f/(ior*ior*ior);
-	float R_2 = ((((5.3725f*a -24.9307f)*a + 22.7437f)*a - 3.40751f)*a + 0.0986325f)*a + 0.00493504f;
-
-	return saturate(1.0f + I_2*R_2*0.0019127f - (1.0f - I_1)*(1.0f - R_1)*9.3205f);
+  if (ior < 1.0f) {
+    ior = 1.0f / ior;
+  }
+  a = saturate(a);
+  ior = clamp(ior, 1.0f, 3.0f);
+  float I_1 = 0.0476898f * expf(-0.978352f * (ior - 0.65657f) * (ior - 0.65657f)) -
+              0.033756f * ior + 0.993261f;
+  float R_1 = (((0.116991f * a - 0.270369f) * a + 0.0501366f) * a - 0.00411511f) * a + 1.00008f;
+  float I_2 = (((-2.08704f * ior + 26.3298f) * ior - 127.906f) * ior + 292.958f) * ior - 287.946f +
+              199.803f / (ior * ior) - 101.668f / (ior * ior * ior);
+  float R_2 = ((((5.3725f * a - 24.9307f) * a + 22.7437f) * a - 3.40751f) * a + 0.0986325f) * a +
+              0.00493504f;
+
+  return saturate(1.0f + I_2 * R_2 * 0.0019127f - (1.0f - I_1) * (1.0f - R_1) * 9.3205f);
 }
 
 ccl_device_forceinline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha)
 {
-	float D = D_ggx(normalize(wi+wo), alpha);
-	float lambda = mf_lambda(wi, make_float2(alpha, alpha));
-	float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
+  float D = D_ggx(normalize(wi + wo), alpha);
+  float lambda = mf_lambda(wi, make_float2(alpha, alpha));
+  float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
 
-	float multiscatter = wo.z * M_1_PI_F;
+  float multiscatter = wo.z * M_1_PI_F;
 
-	float albedo = mf_ggx_albedo(alpha);
-	return albedo*singlescatter + (1.0f - albedo)*multiscatter;
+  float albedo = mf_ggx_albedo(alpha);
+  return albedo * singlescatter + (1.0f - albedo) * multiscatter;
 }
 
 ccl_device_forceinline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo, const float2 alpha)
 {
-	float D = D_ggx_aniso(normalize(wi+wo), alpha);
-	float lambda = mf_lambda(wi, alpha);
-	float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
+  float D = D_ggx_aniso(normalize(wi + wo), alpha);
+  float lambda = mf_lambda(wi, alpha);
+  float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
 
-	float multiscatter = wo.z * M_1_PI_F;
+  float multiscatter = wo.z * M_1_PI_F;
 
-	float albedo = mf_ggx_albedo(sqrtf(alpha.x*alpha.y));
-	return albedo*singlescatter + (1.0f - albedo)*multiscatter;
+  float albedo = mf_ggx_albedo(sqrtf(alpha.x * alpha.y));
+  return albedo * singlescatter + (1.0f - albedo) * multiscatter;
 }
 
-ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, const float alpha, const float eta)
+ccl_device_forceinline float mf_glass_pdf(const float3 wi,
+                                          const float3 wo,
+                                          const float alpha,
+                                          const float eta)
 {
-	bool reflective = (wi.z*wo.z > 0.0f);
-
-	float wh_len;
-	float3 wh = normalize_len(wi + (reflective? wo : (wo*eta)), &wh_len);
-	if(wh.z < 0.0f)
-		wh = -wh;
-	float3 r_wi = (wi.z < 0.0f)? -wi: wi;
-	float lambda = mf_lambda(r_wi, make_float2(alpha, alpha));
-	float D = D_ggx(wh, alpha);
-	float fresnel = fresnel_dielectric_cos(dot(r_wi, wh), eta);
-
-	float multiscatter = fabsf(wo.z * M_1_PI_F);
-	if(reflective) {
-		float singlescatter = 0.25f * D / max((1.0f + lambda) * r_wi.z, 1e-7f);
-		float albedo = mf_ggx_albedo(alpha);
-		return fresnel * (albedo*singlescatter + (1.0f - albedo)*multiscatter);
-	}
-	else {
-		float singlescatter = fabsf(dot(r_wi, wh)*dot(wo, wh) * D * eta*eta / max((1.0f + lambda) * r_wi.z * wh_len*wh_len, 1e-7f));
-		float albedo = mf_ggx_transmission_albedo(alpha, eta);
-		return (1.0f - fresnel) * (albedo*singlescatter + (1.0f - albedo)*multiscatter);
-	}
+  bool reflective = (wi.z * wo.z > 0.0f);
+
+  float wh_len;
+  float3 wh = normalize_len(wi + (reflective ? wo : (wo * eta)), &wh_len);
+  if (wh.z < 0.0f)
+    wh = -wh;
+  float3 r_wi = (wi.z < 0.0f) ? -wi : wi;
+  float lambda = mf_lambda(r_wi, make_float2(alpha, alpha));
+  float D = D_ggx(wh, alpha);
+  float fresnel = fresnel_dielectric_cos(dot(r_wi, wh), eta);
+
+  float multiscatter = fabsf(wo.z * M_1_PI_F);
+  if (reflective) {
+    float singlescatter = 0.25f * D / max((1.0f + lambda) * r_wi.z, 1e-7f);
+    float albedo = mf_ggx_albedo(alpha);
+    return fresnel * (albedo * singlescatter + (1.0f - albedo) * multiscatter);
+  }
+  else {
+    float singlescatter = fabsf(dot(r_wi, wh) * dot(wo, wh) * D * eta * eta /
+                                max((1.0f + lambda) * r_wi.z * wh_len * wh_len, 1e-7f));
+    float albedo = mf_ggx_transmission_albedo(alpha, eta);
+    return (1.0f - fresnel) * (albedo * singlescatter + (1.0f - albedo) * multiscatter);
+  }
 }
 
 /* === Actual random walk implementations, one version of mf_eval and mf_sample per phase function. === */
 
-#define MF_NAME_JOIN(x,y) x ## _ ## y
-#define MF_NAME_EVAL(x,y) MF_NAME_JOIN(x,y)
+#define MF_NAME_JOIN(x, y) x##_##y
+#define MF_NAME_EVAL(x, y) MF_NAME_JOIN(x, y)
 #define MF_FUNCTION_FULL_NAME(prefix) MF_NAME_EVAL(prefix, MF_PHASE_FUNCTION)
 
 #define MF_PHASE_FUNCTION glass
@@ -326,10 +358,10 @@ ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, cons
 
 ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughness)
 {
-	MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
 
-	bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
-	bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
+  bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+  bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
 }
 
 /* === Closure implementations === */
@@ -338,293 +370,395 @@ ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughnes
 
 ccl_device int bsdf_microfacet_multi_ggx_common_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
-	bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
-	bsdf->extra->color.x = saturate(bsdf->extra->color.x);
-	bsdf->extra->color.y = saturate(bsdf->extra->color.y);
-	bsdf->extra->color.z = saturate(bsdf->extra->color.z);
-	bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
-	bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
-	bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+  bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+  bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
+  bsdf->extra->color.x = saturate(bsdf->extra->color.x);
+  bsdf->extra->color.y = saturate(bsdf->extra->color.y);
+  bsdf->extra->color.z = saturate(bsdf->extra->color.z);
+  bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+  bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+  bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
+  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }
 
 ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(MicrofacetBsdf *bsdf)
 {
-	if(is_zero(bsdf->T))
-		bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
+  if (is_zero(bsdf->T))
+    bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
 
-	return bsdf_microfacet_multi_ggx_common_setup(bsdf);
+  return bsdf_microfacet_multi_ggx_common_setup(bsdf);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf,
+                                                             const ShaderData *sd)
 {
-	if(is_zero(bsdf->T))
-		bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
+  if (is_zero(bsdf->T))
+    bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
 
-	float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
-	float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
-	bsdf->sample_weight *= F;
+  float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+  float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+  bsdf->sample_weight *= F;
 
-	return bsdf_microfacet_multi_ggx_common_setup(bsdf);
+  return bsdf_microfacet_multi_ggx_common_setup(bsdf);
 }
 
 ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->alpha_y = bsdf->alpha_x;
+  bsdf->alpha_y = bsdf->alpha_x;
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
 
-	return bsdf_microfacet_multi_ggx_common_setup(bsdf);
+  return bsdf_microfacet_multi_ggx_common_setup(bsdf);
 }
 
 ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
 {
-	bsdf->alpha_y = bsdf->alpha_x;
+  bsdf->alpha_y = bsdf->alpha_x;
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
 
-	float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
-	float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
-	bsdf->sample_weight *= F;
+  float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+  float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+  bsdf->sample_weight *= F;
 
-	return bsdf_microfacet_multi_ggx_common_setup(bsdf);
+  return bsdf_microfacet_multi_ggx_common_setup(bsdf);
 }
 
 ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->alpha_y = bsdf->alpha_x;
-
-	bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
+  bsdf->alpha_y = bsdf->alpha_x;
 
-	return bsdf_microfacet_multi_ggx_common_setup(bsdf);
-}
+  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
 
-ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
-	*pdf = 0.0f;
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return bsdf_microfacet_multi_ggx_common_setup(bsdf);
 }
 
-ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-
-	if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) {
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
-
-	bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID);
-
-	bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y);
-	float3 X, Y, Z;
-	Z = bsdf->N;
-	if(is_aniso)
-		make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
-	else
-		make_orthonormals(Z, &X, &Y);
-
-	float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
-	float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
-
-	if(is_aniso)
-		*pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
-	else
-		*pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
-	return mf_eval_glossy(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
+ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc,
+                                                          const float3 I,
+                                                          const float3 omega_in,
+                                                          float *pdf,
+                                                          ccl_addr_space uint *lcg_state)
+{
+  *pdf = 0.0f;
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state)
+ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc,
+                                                         const float3 I,
+                                                         const float3 omega_in,
+                                                         float *pdf,
+                                                         ccl_addr_space uint *lcg_state)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+
+  if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
+
+  bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID);
+
+  bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y);
+  float3 X, Y, Z;
+  Z = bsdf->N;
+  if (is_aniso)
+    make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+  else
+    make_orthonormals(Z, &X, &Y);
+
+  float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+  float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+
+  if (is_aniso)
+    *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
+  else
+    *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
+  return mf_eval_glossy(localI,
+                        localO,
+                        true,
+                        bsdf->extra->color,
+                        bsdf->alpha_x,
+                        bsdf->alpha_y,
+                        lcg_state,
+                        bsdf->ior,
+                        use_fresnel,
+                        bsdf->extra->cspec0);
+}
+
+ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg,
+                                                const ShaderClosure *sc,
+                                                float3 Ng,
+                                                float3 I,
+                                                float3 dIdx,
+                                                float3 dIdy,
+                                                float randu,
+                                                float randv,
+                                                float3 *eval,
+                                                float3 *omega_in,
+                                                float3 *domega_in_dx,
+                                                float3 *domega_in_dy,
+                                                float *pdf,
+                                                ccl_addr_space uint *lcg_state)
+{
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
 
-	float3 X, Y, Z;
-	Z = bsdf->N;
+  float3 X, Y, Z;
+  Z = bsdf->N;
 
-	if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) {
-		*omega_in = 2*dot(Z, I)*Z - I;
-		*pdf = 1e6f;
-		*eval = make_float3(1e6f, 1e6f, 1e6f);
+  if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
+    *omega_in = 2 * dot(Z, I) * Z - I;
+    *pdf = 1e6f;
+    *eval = make_float3(1e6f, 1e6f, 1e6f);
 #ifdef __RAY_DIFFERENTIALS__
-		*domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
-		*domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
+    *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
+    *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
 #endif
-		return LABEL_REFLECT|LABEL_SINGULAR;
-	}
-
-	bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID);
-
-	bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y);
-	if(is_aniso)
-		make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
-	else
-		make_orthonormals(Z, &X, &Y);
-
-	float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
-	float3 localO;
-
-	*eval = mf_sample_glossy(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
-	if(is_aniso)
-		*pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
-	else
-		*pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
-	*eval *= *pdf;
-
-	*omega_in = X*localO.x + Y*localO.y + Z*localO.z;
+    return LABEL_REFLECT | LABEL_SINGULAR;
+  }
+
+  bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID);
+
+  bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y);
+  if (is_aniso)
+    make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+  else
+    make_orthonormals(Z, &X, &Y);
+
+  float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+  float3 localO;
+
+  *eval = mf_sample_glossy(localI,
+                           &localO,
+                           bsdf->extra->color,
+                           bsdf->alpha_x,
+                           bsdf->alpha_y,
+                           lcg_state,
+                           bsdf->ior,
+                           use_fresnel,
+                           bsdf->extra->cspec0);
+  if (is_aniso)
+    *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
+  else
+    *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
+  *eval *= *pdf;
+
+  *omega_in = X * localO.x + Y * localO.y + Z * localO.z;
 
 #ifdef __RAY_DIFFERENTIALS__
-	*domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
-	*domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
+  *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
+  *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
 #endif
-	return LABEL_REFLECT|LABEL_GLOSSY;
+  return LABEL_REFLECT | LABEL_GLOSSY;
 }
 
 /* Multiscattering GGX Glass closure */
 
 ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
-	bsdf->alpha_y = bsdf->alpha_x;
-	bsdf->ior = max(0.0f, bsdf->ior);
-	bsdf->extra->color.x = saturate(bsdf->extra->color.x);
-	bsdf->extra->color.y = saturate(bsdf->extra->color.y);
-	bsdf->extra->color.z = saturate(bsdf->extra->color.z);
+  bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+  bsdf->alpha_y = bsdf->alpha_x;
+  bsdf->ior = max(0.0f, bsdf->ior);
+  bsdf->extra->color.x = saturate(bsdf->extra->color.x);
+  bsdf->extra->color.y = saturate(bsdf->extra->color.y);
+  bsdf->extra->color.z = saturate(bsdf->extra->color.z);
 
-	bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
+  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf,
+                                                             const ShaderData *sd)
 {
-	bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
-	bsdf->alpha_y = bsdf->alpha_x;
-	bsdf->ior = max(0.0f, bsdf->ior);
-	bsdf->extra->color.x = saturate(bsdf->extra->color.x);
-	bsdf->extra->color.y = saturate(bsdf->extra->color.y);
-	bsdf->extra->color.z = saturate(bsdf->extra->color.z);
-	bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
-	bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
-	bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
-
-	bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID;
-
-	float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
-	float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
-	bsdf->sample_weight *= F;
-
-	return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
-}
-
-ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-
-	if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) {
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
-
-	float3 X, Y, Z;
-	Z = bsdf->N;
-	make_orthonormals(Z, &X, &Y);
-
-	float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
-	float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
-
-	*pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
-	return mf_eval_glass(localI, localO, false, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, false, bsdf->extra->color);
-}
-
-ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-
-	if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) {
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
-
-	bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID);
-
-	float3 X, Y, Z;
-	Z = bsdf->N;
-	make_orthonormals(Z, &X, &Y);
-
-	float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
-	float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
-
-	*pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
-	return mf_eval_glass(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
-}
-
-ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state)
+  bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+  bsdf->alpha_y = bsdf->alpha_x;
+  bsdf->ior = max(0.0f, bsdf->ior);
+  bsdf->extra->color.x = saturate(bsdf->extra->color.x);
+  bsdf->extra->color.y = saturate(bsdf->extra->color.y);
+  bsdf->extra->color.z = saturate(bsdf->extra->color.z);
+  bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+  bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+  bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+
+  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID;
+
+  float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+  float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+  bsdf->sample_weight *= F;
+
+  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
+}
+
+ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc,
+                                                                const float3 I,
+                                                                const float3 omega_in,
+                                                                float *pdf,
+                                                                ccl_addr_space uint *lcg_state)
+{
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+
+  if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
+
+  float3 X, Y, Z;
+  Z = bsdf->N;
+  make_orthonormals(Z, &X, &Y);
+
+  float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+  float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+
+  *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
+  return mf_eval_glass(localI,
+                       localO,
+                       false,
+                       bsdf->extra->color,
+                       bsdf->alpha_x,
+                       bsdf->alpha_y,
+                       lcg_state,
+                       bsdf->ior,
+                       false,
+                       bsdf->extra->color);
+}
+
+ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc,
+                                                               const float3 I,
+                                                               const float3 omega_in,
+                                                               float *pdf,
+                                                               ccl_addr_space uint *lcg_state)
+{
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+
+  if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
+
+  bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID);
+
+  float3 X, Y, Z;
+  Z = bsdf->N;
+  make_orthonormals(Z, &X, &Y);
+
+  float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+  float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+
+  *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
+  return mf_eval_glass(localI,
+                       localO,
+                       true,
+                       bsdf->extra->color,
+                       bsdf->alpha_x,
+                       bsdf->alpha_y,
+                       lcg_state,
+                       bsdf->ior,
+                       use_fresnel,
+                       bsdf->extra->cspec0);
+}
+
+ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg,
+                                                      const ShaderClosure *sc,
+                                                      float3 Ng,
+                                                      float3 I,
+                                                      float3 dIdx,
+                                                      float3 dIdy,
+                                                      float randu,
+                                                      float randv,
+                                                      float3 *eval,
+                                                      float3 *omega_in,
+                                                      float3 *domega_in_dx,
+                                                      float3 *domega_in_dy,
+                                                      float *pdf,
+                                                      ccl_addr_space uint *lcg_state)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
 
-	float3 X, Y, Z;
-	Z = bsdf->N;
+  float3 X, Y, Z;
+  Z = bsdf->N;
 
-	if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) {
-		float3 R, T;
+  if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
+    float3 R, T;
 #ifdef __RAY_DIFFERENTIALS__
-		float3 dRdx, dRdy, dTdx, dTdy;
+    float3 dRdx, dRdy, dTdx, dTdy;
 #endif
-		bool inside;
-		float fresnel = fresnel_dielectric(bsdf->ior, Z, I, &R, &T,
+    bool inside;
+    float fresnel = fresnel_dielectric(bsdf->ior,
+                                       Z,
+                                       I,
+                                       &R,
+                                       &T,
 #ifdef __RAY_DIFFERENTIALS__
-		                dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy,
+                                       dIdx,
+                                       dIdy,
+                                       &dRdx,
+                                       &dRdy,
+                                       &dTdx,
+                                       &dTdy,
 #endif
-		                &inside);
+                                       &inside);
 
-		*pdf = 1e6f;
-		*eval = make_float3(1e6f, 1e6f, 1e6f);
-		if(randu < fresnel) {
-			*omega_in = R;
+    *pdf = 1e6f;
+    *eval = make_float3(1e6f, 1e6f, 1e6f);
+    if (randu < fresnel) {
+      *omega_in = R;
 #ifdef __RAY_DIFFERENTIALS__
-			*domega_in_dx = dRdx;
-			*domega_in_dy = dRdy;
+      *domega_in_dx = dRdx;
+      *domega_in_dy = dRdy;
 #endif
-			return LABEL_REFLECT|LABEL_SINGULAR;
-		}
-		else {
-			*omega_in = T;
+      return LABEL_REFLECT | LABEL_SINGULAR;
+    }
+    else {
+      *omega_in = T;
 #ifdef __RAY_DIFFERENTIALS__
-			*domega_in_dx = dTdx;
-			*domega_in_dy = dTdy;
+      *domega_in_dx = dTdx;
+      *domega_in_dy = dTdy;
 #endif
-			return LABEL_TRANSMIT|LABEL_SINGULAR;
-		}
-	}
-
-	bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID);
-
-	make_orthonormals(Z, &X, &Y);
-
-	float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
-	float3 localO;
-
-	*eval = mf_sample_glass(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
-	*pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
-	*eval *= *pdf;
-
-	*omega_in = X*localO.x + Y*localO.y + Z*localO.z;
-	if(localO.z*localI.z > 0.0f) {
+      return LABEL_TRANSMIT | LABEL_SINGULAR;
+    }
+  }
+
+  bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID);
+
+  make_orthonormals(Z, &X, &Y);
+
+  float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+  float3 localO;
+
+  *eval = mf_sample_glass(localI,
+                          &localO,
+                          bsdf->extra->color,
+                          bsdf->alpha_x,
+                          bsdf->alpha_y,
+                          lcg_state,
+                          bsdf->ior,
+                          use_fresnel,
+                          bsdf->extra->cspec0);
+  *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
+  *eval *= *pdf;
+
+  *omega_in = X * localO.x + Y * localO.y + Z * localO.z;
+  if (localO.z * localI.z > 0.0f) {
 #ifdef __RAY_DIFFERENTIALS__
-		*domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
-		*domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
+    *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
+    *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
 #endif
-		return LABEL_REFLECT|LABEL_GLOSSY;
-	}
-	else {
+    return LABEL_REFLECT | LABEL_GLOSSY;
+  }
+  else {
 #ifdef __RAY_DIFFERENTIALS__
-		float cosI = dot(Z, I);
-		float dnp = max(sqrtf(1.0f - (bsdf->ior * bsdf->ior * (1.0f - cosI*cosI))), 1e-7f);
-		*domega_in_dx = -(bsdf->ior * dIdx) + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdx, Z)) * Z;
-		*domega_in_dy = -(bsdf->ior * dIdy) + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdy, Z)) * Z;
+    float cosI = dot(Z, I);
+    float dnp = max(sqrtf(1.0f - (bsdf->ior * bsdf->ior * (1.0f - cosI * cosI))), 1e-7f);
+    *domega_in_dx = -(bsdf->ior * dIdx) +
+                    ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdx, Z)) * Z;
+    *domega_in_dy = -(bsdf->ior * dIdy) +
+                    ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdy, Z)) * Z;
 #endif
 
-		return LABEL_TRANSMIT|LABEL_GLOSSY;
-	}
+    return LABEL_TRANSMIT | LABEL_GLOSSY;
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
index 5d300ef6db5..79247ee8057 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
@@ -25,247 +25,251 @@
  * energy is used. In combination with MIS, that is enough to produce an unbiased result, although
  * the balance heuristic isn't necessarily optimal anymore.
  */
-ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
-	float3 wi,
-	float3 wo,
-	const bool wo_outside,
-	const float3 color,
-	const float alpha_x,
-	const float alpha_y,
-	ccl_addr_space uint *lcg_state,
-	const float eta,
-	bool use_fresnel,
-	const float3 cspec0)
+ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
+                                                             float3 wo,
+                                                             const bool wo_outside,
+                                                             const float3 color,
+                                                             const float alpha_x,
+                                                             const float alpha_y,
+                                                             ccl_addr_space uint *lcg_state,
+                                                             const float eta,
+                                                             bool use_fresnel,
+                                                             const float3 cspec0)
 {
-	/* Evaluating for a shallower incoming direction produces less noise, and the properties of the BSDF guarantee reciprocity. */
-	bool swapped = false;
+  /* Evaluating for a shallower incoming direction produces less noise, and the properties of the BSDF guarantee reciprocity. */
+  bool swapped = false;
 #ifdef MF_MULTI_GLASS
-	if(wi.z*wo.z < 0.0f) {
-		/* Glass transmission is a special case and requires the directions to change hemisphere. */
-		if(-wo.z < wi.z) {
-			swapped = true;
-			float3 tmp = -wo;
-			wo = -wi;
-			wi = tmp;
-		}
-	}
-	else
+  if (wi.z * wo.z < 0.0f) {
+    /* Glass transmission is a special case and requires the directions to change hemisphere. */
+    if (-wo.z < wi.z) {
+      swapped = true;
+      float3 tmp = -wo;
+      wo = -wi;
+      wi = tmp;
+    }
+  }
+  else
 #endif
-	if(wo.z < wi.z) {
-		swapped = true;
-		float3 tmp = wo;
-		wo = wi;
-		wi = tmp;
-	}
+      if (wo.z < wi.z) {
+    swapped = true;
+    float3 tmp = wo;
+    wo = wi;
+    wi = tmp;
+  }
 
-	if(wi.z < 1e-5f || (wo.z < 1e-5f && wo_outside) || (wo.z > -1e-5f && !wo_outside))
-		return make_float3(0.0f, 0.0f, 0.0f);
+  if (wi.z < 1e-5f || (wo.z < 1e-5f && wo_outside) || (wo.z > -1e-5f && !wo_outside))
+    return make_float3(0.0f, 0.0f, 0.0f);
 
-	const float2 alpha = make_float2(alpha_x, alpha_y);
+  const float2 alpha = make_float2(alpha_x, alpha_y);
 
-	float lambda_r = mf_lambda(-wi, alpha);
-	float shadowing_lambda = mf_lambda(wo_outside? wo: -wo, alpha);
+  float lambda_r = mf_lambda(-wi, alpha);
+  float shadowing_lambda = mf_lambda(wo_outside ? wo : -wo, alpha);
 
-	/* Analytically compute single scattering for lower noise. */
-	float3 eval;
-	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-	const float3 wh = normalize(wi+wo);
+  /* Analytically compute single scattering for lower noise. */
+  float3 eval;
+  float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+  const float3 wh = normalize(wi + wo);
 #ifdef MF_MULTI_GLASS
-	eval = mf_eval_phase_glass(-wi, lambda_r, wo, wo_outside, alpha, eta);
-	if(wo_outside)
-		eval *= -lambda_r / (shadowing_lambda - lambda_r);
-	else
-		eval *= -lambda_r * beta(-lambda_r, shadowing_lambda+1.0f);
-#else  /* MF_MULTI_GLOSSY */
-	const float G2 = 1.0f / (1.0f - (lambda_r + 1.0f) + shadowing_lambda);
-	float val = G2 * 0.25f / wi.z;
-	if(alpha.x == alpha.y)
-		val *= D_ggx(wh, alpha.x);
-	else
-		val *= D_ggx_aniso(wh, alpha);
-	eval = make_float3(val, val, val);
+  eval = mf_eval_phase_glass(-wi, lambda_r, wo, wo_outside, alpha, eta);
+  if (wo_outside)
+    eval *= -lambda_r / (shadowing_lambda - lambda_r);
+  else
+    eval *= -lambda_r * beta(-lambda_r, shadowing_lambda + 1.0f);
+#else /* MF_MULTI_GLOSSY */
+  const float G2 = 1.0f / (1.0f - (lambda_r + 1.0f) + shadowing_lambda);
+  float val = G2 * 0.25f / wi.z;
+  if (alpha.x == alpha.y)
+    val *= D_ggx(wh, alpha.x);
+  else
+    val *= D_ggx_aniso(wh, alpha);
+  eval = make_float3(val, val, val);
 #endif
 
-	float F0 = fresnel_dielectric_cos(1.0f, eta);
-	if(use_fresnel) {
-		throughput = interpolate_fresnel_color(wi, wh, eta, F0, cspec0);
+  float F0 = fresnel_dielectric_cos(1.0f, eta);
+  if (use_fresnel) {
+    throughput = interpolate_fresnel_color(wi, wh, eta, F0, cspec0);
 
-		eval *= throughput;
-	}
+    eval *= throughput;
+  }
 
-	float3 wr = -wi;
-	float hr = 1.0f;
-	float C1_r = 1.0f;
-	float G1_r = 0.0f;
-	bool outside = true;
+  float3 wr = -wi;
+  float hr = 1.0f;
+  float C1_r = 1.0f;
+  float G1_r = 0.0f;
+  bool outside = true;
 
-	for(int order = 0; order < 10; order++) {
-		/* Sample microfacet height. */
-		float height_rand = lcg_step_float_addrspace(lcg_state);
-		if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand))
-			break;
-		/* Sample microfacet normal. */
-		float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
-		float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
-		float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
+  for (int order = 0; order < 10; order++) {
+    /* Sample microfacet height. */
+    float height_rand = lcg_step_float_addrspace(lcg_state);
+    if (!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand))
+      break;
+    /* Sample microfacet normal. */
+    float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
+    float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
+    float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
 
 #ifdef MF_MULTI_GLASS
-		if(order == 0 && use_fresnel) {
-			/* Evaluate amount of scattering towards wo on this microfacet. */
-			float3 phase;
-			if(outside)
-				phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta);
-			else
-				phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f / eta);
+    if (order == 0 && use_fresnel) {
+      /* Evaluate amount of scattering towards wo on this microfacet. */
+      float3 phase;
+      if (outside)
+        phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta);
+      else
+        phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f / eta);
 
-			eval = throughput * phase * mf_G1(wo_outside ? wo : -wo, mf_C1((outside == wo_outside) ? hr : -hr), shadowing_lambda);
-		}
+      eval = throughput * phase *
+             mf_G1(wo_outside ? wo : -wo,
+                   mf_C1((outside == wo_outside) ? hr : -hr),
+                   shadowing_lambda);
+    }
 #endif
-		if(order > 0) {
-			/* Evaluate amount of scattering towards wo on this microfacet. */
-			float3 phase;
+    if (order > 0) {
+      /* Evaluate amount of scattering towards wo on this microfacet. */
+      float3 phase;
 #ifdef MF_MULTI_GLASS
-			if(outside)
-				phase = mf_eval_phase_glass(wr, lambda_r,  wo,  wo_outside, alpha, eta);
-			else
-				phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f/eta);
-#else  /* MF_MULTI_GLOSSY */
-			phase = mf_eval_phase_glossy(wr, lambda_r, wo, alpha) * throughput;
+      if (outside)
+        phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta);
+      else
+        phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f / eta);
+#else /* MF_MULTI_GLOSSY */
+      phase = mf_eval_phase_glossy(wr, lambda_r, wo, alpha) * throughput;
 #endif
-			eval += throughput * phase * mf_G1(wo_outside? wo: -wo, mf_C1((outside == wo_outside)? hr: -hr), shadowing_lambda);
-		}
-		if(order+1 < 10) {
-			/* Bounce from the microfacet. */
+      eval += throughput * phase *
+              mf_G1(wo_outside ? wo : -wo,
+                    mf_C1((outside == wo_outside) ? hr : -hr),
+                    shadowing_lambda);
+    }
+    if (order + 1 < 10) {
+      /* Bounce from the microfacet. */
 #ifdef MF_MULTI_GLASS
-			bool next_outside;
-			float3 wi_prev = -wr;
-			float phase_rand = lcg_step_float_addrspace(lcg_state);
-			wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, phase_rand, &next_outside);
-			if(!next_outside) {
-				outside = !outside;
-				wr = -wr;
-				hr = -hr;
-			}
+      bool next_outside;
+      float3 wi_prev = -wr;
+      float phase_rand = lcg_step_float_addrspace(lcg_state);
+      wr = mf_sample_phase_glass(-wr, outside ? eta : 1.0f / eta, wm, phase_rand, &next_outside);
+      if (!next_outside) {
+        outside = !outside;
+        wr = -wr;
+        hr = -hr;
+      }
 
-			if(use_fresnel && !next_outside) {
-				throughput *= color;
-			}
-			else if(use_fresnel && order > 0) {
-				throughput *= interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0);
-			}
-#else  /* MF_MULTI_GLOSSY */
-			if(use_fresnel && order > 0) {
-				throughput *= interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
-			}
-			wr = mf_sample_phase_glossy(-wr, &throughput, wm);
+      if (use_fresnel && !next_outside) {
+        throughput *= color;
+      }
+      else if (use_fresnel && order > 0) {
+        throughput *= interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0);
+      }
+#else /* MF_MULTI_GLOSSY */
+      if (use_fresnel && order > 0) {
+        throughput *= interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
+      }
+      wr = mf_sample_phase_glossy(-wr, &throughput, wm);
 #endif
 
-			lambda_r = mf_lambda(wr, alpha);
+      lambda_r = mf_lambda(wr, alpha);
 
-			if(!use_fresnel)
-				throughput *= color;
+      if (!use_fresnel)
+        throughput *= color;
 
-			C1_r = mf_C1(hr);
-			G1_r = mf_G1(wr, C1_r, lambda_r);
-		}
-	}
+      C1_r = mf_C1(hr);
+      G1_r = mf_G1(wr, C1_r, lambda_r);
+    }
+  }
 
-	if(swapped)
-		eval *= fabsf(wi.z / wo.z);
-	return eval;
+  if (swapped)
+    eval *= fabsf(wi.z / wo.z);
+  return eval;
 }
 
 /* Perform a random walk on the microsurface starting from wi, returning the direction in which the walk
  * escaped the surface in wo. The function returns the throughput between wi and wo.
  * Without reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal.
  */
-ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(
-	float3 wi,
-	float3 *wo,
-	const float3 color,
-	const float alpha_x,
-	const float alpha_y,
-	ccl_addr_space uint *lcg_state,
-	const float eta,
-	bool use_fresnel,
-	const float3 cspec0)
+ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi,
+                                                               float3 *wo,
+                                                               const float3 color,
+                                                               const float alpha_x,
+                                                               const float alpha_y,
+                                                               ccl_addr_space uint *lcg_state,
+                                                               const float eta,
+                                                               bool use_fresnel,
+                                                               const float3 cspec0)
 {
-	const float2 alpha = make_float2(alpha_x, alpha_y);
+  const float2 alpha = make_float2(alpha_x, alpha_y);
 
-	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-	float3 wr = -wi;
-	float lambda_r = mf_lambda(wr, alpha);
-	float hr = 1.0f;
-	float C1_r = 1.0f;
-	float G1_r = 0.0f;
-	bool outside = true;
+  float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+  float3 wr = -wi;
+  float lambda_r = mf_lambda(wr, alpha);
+  float hr = 1.0f;
+  float C1_r = 1.0f;
+  float G1_r = 0.0f;
+  bool outside = true;
 
-	float F0 = fresnel_dielectric_cos(1.0f, eta);
-	if(use_fresnel) {
-		throughput = interpolate_fresnel_color(wi, normalize(wi + wr), eta, F0, cspec0);
-	}
+  float F0 = fresnel_dielectric_cos(1.0f, eta);
+  if (use_fresnel) {
+    throughput = interpolate_fresnel_color(wi, normalize(wi + wr), eta, F0, cspec0);
+  }
 
-	int order;
-	for(order = 0; order < 10; order++) {
-		/* Sample microfacet height. */
-		float height_rand = lcg_step_float_addrspace(lcg_state);
-		if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand)) {
-			/* The random walk has left the surface. */
-			*wo = outside? wr: -wr;
-			return throughput;
-		}
-		/* Sample microfacet normal. */
-		float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
-		float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
-		float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
+  int order;
+  for (order = 0; order < 10; order++) {
+    /* Sample microfacet height. */
+    float height_rand = lcg_step_float_addrspace(lcg_state);
+    if (!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand)) {
+      /* The random walk has left the surface. */
+      *wo = outside ? wr : -wr;
+      return throughput;
+    }
+    /* Sample microfacet normal. */
+    float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
+    float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
+    float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
 
-		/* First-bounce color is already accounted for in mix weight. */
-		if(!use_fresnel && order > 0)
-			throughput *= color;
+    /* First-bounce color is already accounted for in mix weight. */
+    if (!use_fresnel && order > 0)
+      throughput *= color;
 
-		/* Bounce from the microfacet. */
+      /* Bounce from the microfacet. */
 #ifdef MF_MULTI_GLASS
-		bool next_outside;
-		float3 wi_prev = -wr;
-		float phase_rand = lcg_step_float_addrspace(lcg_state);
-		wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, phase_rand, &next_outside);
-		if(!next_outside) {
-			hr = -hr;
-			wr = -wr;
-			outside = !outside;
-		}
+    bool next_outside;
+    float3 wi_prev = -wr;
+    float phase_rand = lcg_step_float_addrspace(lcg_state);
+    wr = mf_sample_phase_glass(-wr, outside ? eta : 1.0f / eta, wm, phase_rand, &next_outside);
+    if (!next_outside) {
+      hr = -hr;
+      wr = -wr;
+      outside = !outside;
+    }
 
-		if(use_fresnel) {
-			if(!next_outside) {
-				throughput *= color;
-			}
-			else {
-				float3 t_color = interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0);
+    if (use_fresnel) {
+      if (!next_outside) {
+        throughput *= color;
+      }
+      else {
+        float3 t_color = interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0);
 
-				if(order == 0)
-					throughput = t_color;
-				else
-					throughput *= t_color;
-			}
-		}
-#else  /* MF_MULTI_GLOSSY */
-		if(use_fresnel) {
-			float3 t_color = interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
+        if (order == 0)
+          throughput = t_color;
+        else
+          throughput *= t_color;
+      }
+    }
+#else /* MF_MULTI_GLOSSY */
+    if (use_fresnel) {
+      float3 t_color = interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
 
-			if(order == 0)
-				throughput = t_color;
-			else
-				throughput *= t_color;
-		}
-		wr = mf_sample_phase_glossy(-wr, &throughput, wm);
+      if (order == 0)
+        throughput = t_color;
+      else
+        throughput *= t_color;
+    }
+    wr = mf_sample_phase_glossy(-wr, &throughput, wm);
 #endif
 
-		/* Update random walk parameters. */
-		lambda_r = mf_lambda(wr, alpha);
-		G1_r = mf_G1(wr, C1_r, lambda_r);
-	}
-	*wo = make_float3(0.0f, 0.0f, 1.0f);
-	return make_float3(0.0f, 0.0f, 0.0f);
+    /* Update random walk parameters. */
+    lambda_r = mf_lambda(wr, alpha);
+    G1_r = mf_G1(wr, C1_r, lambda_r);
+  }
+  *wo = make_float3(0.0f, 0.0f, 1.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
 #undef MF_MULTI_GLASS
diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
index 3446d1609d9..104ed5b2818 100644
--- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@@ -20,92 +20,110 @@
 CCL_NAMESPACE_BEGIN
 
 typedef ccl_addr_space struct OrenNayarBsdf {
-	SHADER_CLOSURE_BASE;
+  SHADER_CLOSURE_BASE;
 
-	float roughness;
-	float a;
-	float b;
+  float roughness;
+  float a;
+  float b;
 } OrenNayarBsdf;
 
-ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc, float3 n, float3 v, float3 l)
+ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc,
+                                                float3 n,
+                                                float3 v,
+                                                float3 l)
 {
-	const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc;
-	float nl = max(dot(n, l), 0.0f);
-	float nv = max(dot(n, v), 0.0f);
-	float t = dot(l, v) - nl * nv;
-
-	if(t > 0.0f)
-		t /= max(nl, nv) + FLT_MIN;
-	float is = nl * (bsdf->a + bsdf->b * t);
-	return make_float3(is, is, is);
+  const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+  float nl = max(dot(n, l), 0.0f);
+  float nv = max(dot(n, v), 0.0f);
+  float t = dot(l, v) - nl * nv;
+
+  if (t > 0.0f)
+    t /= max(nl, nv) + FLT_MIN;
+  float is = nl * (bsdf->a + bsdf->b * t);
+  return make_float3(is, is, is);
 }
 
 ccl_device int bsdf_oren_nayar_setup(OrenNayarBsdf *bsdf)
 {
-	float sigma = bsdf->roughness;
+  float sigma = bsdf->roughness;
 
-	bsdf->type = CLOSURE_BSDF_OREN_NAYAR_ID;
+  bsdf->type = CLOSURE_BSDF_OREN_NAYAR_ID;
 
-	sigma = saturate(sigma);
+  sigma = saturate(sigma);
 
-	float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma);
+  float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma);
 
-	bsdf->a = 1.0f * div;
-	bsdf->b = sigma * div;
+  bsdf->a = 1.0f * div;
+  bsdf->b = sigma * div;
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device bool bsdf_oren_nayar_merge(const ShaderClosure *a, const ShaderClosure *b)
 {
-	const OrenNayarBsdf *bsdf_a = (const OrenNayarBsdf*)a;
-	const OrenNayarBsdf *bsdf_b = (const OrenNayarBsdf*)b;
+  const OrenNayarBsdf *bsdf_a = (const OrenNayarBsdf *)a;
+  const OrenNayarBsdf *bsdf_b = (const OrenNayarBsdf *)b;
 
-	return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
-	       (bsdf_a->roughness == bsdf_b->roughness);
+  return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->roughness == bsdf_b->roughness);
 }
 
-ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc,
+                                               const float3 I,
+                                               const float3 omega_in,
+                                               float *pdf)
 {
-	const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc;
-	if(dot(bsdf->N, omega_in) > 0.0f) {
-		*pdf = 0.5f * M_1_PI_F;
-		return bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, omega_in);
-	}
-	else {
-		*pdf = 0.0f;
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+  if (dot(bsdf->N, omega_in) > 0.0f) {
+    *pdf = 0.5f * M_1_PI_F;
+    return bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, omega_in);
+  }
+  else {
+    *pdf = 0.0f;
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
-ccl_device float3 bsdf_oren_nayar_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_oren_nayar_eval_transmit(const ShaderClosure *sc,
+                                                const float3 I,
+                                                const float3 omega_in,
+                                                float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc,
+                                      float3 Ng,
+                                      float3 I,
+                                      float3 dIdx,
+                                      float3 dIdy,
+                                      float randu,
+                                      float randv,
+                                      float3 *eval,
+                                      float3 *omega_in,
+                                      float3 *domega_in_dx,
+                                      float3 *domega_in_dy,
+                                      float *pdf)
 {
-	const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc;
-	sample_uniform_hemisphere(bsdf->N, randu, randv, omega_in, pdf);
+  const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+  sample_uniform_hemisphere(bsdf->N, randu, randv, omega_in, pdf);
 
-	if(dot(Ng, *omega_in) > 0.0f) {
-		*eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in);
+  if (dot(Ng, *omega_in) > 0.0f) {
+    *eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in);
 
 #ifdef __RAY_DIFFERENTIALS__
-		// TODO: find a better approximation for the bounce
-		*domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
-		*domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
+    // TODO: find a better approximation for the bounce
+    *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+    *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
 #endif
-	}
-	else {
-		*pdf = 0.0f;
-		*eval = make_float3(0.0f, 0.0f, 0.0f);
-	}
+  }
+  else {
+    *pdf = 0.0f;
+    *eval = make_float3(0.0f, 0.0f, 0.0f);
+  }
 
-	return LABEL_REFLECT|LABEL_DIFFUSE;
+  return LABEL_REFLECT | LABEL_DIFFUSE;
 }
 
-
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_OREN_NAYAR_H__ */
+#endif /* __BSDF_OREN_NAYAR_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
index 83da05ac435..b6fd0e68681 100644
--- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
@@ -38,105 +38,118 @@ CCL_NAMESPACE_BEGIN
 #ifdef __OSL__
 
 typedef ccl_addr_space struct PhongRampBsdf {
-	SHADER_CLOSURE_BASE;
+  SHADER_CLOSURE_BASE;
 
-	float exponent;
-	float3 *colors;
+  float exponent;
+  float3 *colors;
 } PhongRampBsdf;
 
 ccl_device float3 bsdf_phong_ramp_get_color(const float3 colors[8], float pos)
 {
-	int MAXCOLORS = 8;
-
-	float npos = pos * (float)(MAXCOLORS - 1);
-	int ipos = float_to_int(npos);
-	if(ipos < 0)
-		return colors[0];
-	if(ipos >= (MAXCOLORS - 1))
-		return colors[MAXCOLORS - 1];
-	float offset = npos - (float)ipos;
-	return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset;
+  int MAXCOLORS = 8;
+
+  float npos = pos * (float)(MAXCOLORS - 1);
+  int ipos = float_to_int(npos);
+  if (ipos < 0)
+    return colors[0];
+  if (ipos >= (MAXCOLORS - 1))
+    return colors[MAXCOLORS - 1];
+  float offset = npos - (float)ipos;
+  return colors[ipos] * (1.0f - offset) + colors[ipos + 1] * offset;
 }
 
 ccl_device int bsdf_phong_ramp_setup(PhongRampBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_PHONG_RAMP_ID;
-	bsdf->exponent = max(bsdf->exponent, 0.0f);
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_PHONG_RAMP_ID;
+  bsdf->exponent = max(bsdf->exponent, 0.0f);
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc,
+                                               const float3 I,
+                                               const float3 omega_in,
+                                               float *pdf)
 {
-	const PhongRampBsdf *bsdf = (const PhongRampBsdf*)sc;
-	float m_exponent = bsdf->exponent;
-	float cosNI = dot(bsdf->N, omega_in);
-	float cosNO = dot(bsdf->N, I);
-
-	if(cosNI > 0 && cosNO > 0) {
-		// reflect the view vector
-		float3 R = (2 * cosNO) * bsdf->N - I;
-		float cosRI = dot(R, omega_in);
-		if(cosRI > 0) {
-			float cosp = powf(cosRI, m_exponent);
-			float common = 0.5f * M_1_PI_F * cosp;
-			float out = cosNI * (m_exponent + 2) * common;
-			*pdf = (m_exponent + 1) * common;
-			return bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
-		}
-	}
-
-	return make_float3(0.0f, 0.0f, 0.0f);
+  const PhongRampBsdf *bsdf = (const PhongRampBsdf *)sc;
+  float m_exponent = bsdf->exponent;
+  float cosNI = dot(bsdf->N, omega_in);
+  float cosNO = dot(bsdf->N, I);
+
+  if (cosNI > 0 && cosNO > 0) {
+    // reflect the view vector
+    float3 R = (2 * cosNO) * bsdf->N - I;
+    float cosRI = dot(R, omega_in);
+    if (cosRI > 0) {
+      float cosp = powf(cosRI, m_exponent);
+      float common = 0.5f * M_1_PI_F * cosp;
+      float out = cosNI * (m_exponent + 2) * common;
+      *pdf = (m_exponent + 1) * common;
+      return bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
+    }
+  }
+
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc,
+                                                const float3 I,
+                                                const float3 omega_in,
+                                                float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc,
+                                      float3 Ng,
+                                      float3 I,
+                                      float3 dIdx,
+                                      float3 dIdy,
+                                      float randu,
+                                      float randv,
+                                      float3 *eval,
+                                      float3 *omega_in,
+                                      float3 *domega_in_dx,
+                                      float3 *domega_in_dy,
+                                      float *pdf)
 {
-	const PhongRampBsdf *bsdf = (const PhongRampBsdf*)sc;
-	float cosNO = dot(bsdf->N, I);
-	float m_exponent = bsdf->exponent;
-
-	if(cosNO > 0) {
-		// reflect the view vector
-		float3 R = (2 * cosNO) * bsdf->N - I;
-
-#ifdef __RAY_DIFFERENTIALS__
-		*domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
-		*domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
-#endif
-
-		float3 T, B;
-		make_orthonormals (R, &T, &B);
-		float phi = M_2PI_F * randu;
-		float cosTheta = powf(randv, 1 / (m_exponent + 1));
-		float sinTheta2 = 1 - cosTheta * cosTheta;
-		float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0;
-		*omega_in = (cosf(phi) * sinTheta) * T +
-		            (sinf(phi) * sinTheta) * B +
-		            (            cosTheta) * R;
-		if(dot(Ng, *omega_in) > 0.0f)
-		{
-			// common terms for pdf and eval
-			float cosNI = dot(bsdf->N, *omega_in);
-			// make sure the direction we chose is still in the right hemisphere
-			if(cosNI > 0)
-			{
-				float cosp = powf(cosTheta, m_exponent);
-				float common = 0.5f * M_1_PI_F * cosp;
-				*pdf = (m_exponent + 1) * common;
-				float out = cosNI * (m_exponent + 2) * common;
-				*eval = bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
-			}
-		}
-	}
-	return LABEL_REFLECT|LABEL_GLOSSY;
+  const PhongRampBsdf *bsdf = (const PhongRampBsdf *)sc;
+  float cosNO = dot(bsdf->N, I);
+  float m_exponent = bsdf->exponent;
+
+  if (cosNO > 0) {
+    // reflect the view vector
+    float3 R = (2 * cosNO) * bsdf->N - I;
+
+#  ifdef __RAY_DIFFERENTIALS__
+    *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+    *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
+#  endif
+
+    float3 T, B;
+    make_orthonormals(R, &T, &B);
+    float phi = M_2PI_F * randu;
+    float cosTheta = powf(randv, 1 / (m_exponent + 1));
+    float sinTheta2 = 1 - cosTheta * cosTheta;
+    float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0;
+    *omega_in = (cosf(phi) * sinTheta) * T + (sinf(phi) * sinTheta) * B + (cosTheta)*R;
+    if (dot(Ng, *omega_in) > 0.0f) {
+      // common terms for pdf and eval
+      float cosNI = dot(bsdf->N, *omega_in);
+      // make sure the direction we chose is still in the right hemisphere
+      if (cosNI > 0) {
+        float cosp = powf(cosTheta, m_exponent);
+        float common = 0.5f * M_1_PI_F * cosp;
+        *pdf = (m_exponent + 1) * common;
+        float out = cosNI * (m_exponent + 2) * common;
+        *eval = bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
+      }
+    }
+  }
+  return LABEL_REFLECT | LABEL_GLOSSY;
 }
 
-#endif  /* __OSL__ */
+#endif /* __OSL__ */
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_PHONG_RAMP_H__ */
+#endif /* __BSDF_PHONG_RAMP_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index 2f65fd54be2..d7795974ef5 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ -25,101 +25,113 @@
 CCL_NAMESPACE_BEGIN
 
 typedef ccl_addr_space struct PrincipledDiffuseBsdf {
-	SHADER_CLOSURE_BASE;
+  SHADER_CLOSURE_BASE;
 
-	float roughness;
+  float roughness;
 } PrincipledDiffuseBsdf;
 
-ccl_device float3 calculate_principled_diffuse_brdf(const PrincipledDiffuseBsdf *bsdf,
-	float3 N, float3 V, float3 L, float3 H, float *pdf)
+ccl_device float3 calculate_principled_diffuse_brdf(
+    const PrincipledDiffuseBsdf *bsdf, float3 N, float3 V, float3 L, float3 H, float *pdf)
 {
-	float NdotL = max(dot(N, L), 0.0f);
-	float NdotV = max(dot(N, V), 0.0f);
+  float NdotL = max(dot(N, L), 0.0f);
+  float NdotV = max(dot(N, V), 0.0f);
 
-	if(NdotL < 0 || NdotV < 0) {
-		*pdf = 0.0f;
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  if (NdotL < 0 || NdotV < 0) {
+    *pdf = 0.0f;
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 
-	float LdotH = dot(L, H);
+  float LdotH = dot(L, H);
 
-	float FL = schlick_fresnel(NdotL), FV = schlick_fresnel(NdotV);
-	const float Fd90 = 0.5f + 2.0f * LdotH*LdotH * bsdf->roughness;
-	float Fd = (1.0f * (1.0f - FL) + Fd90 * FL) * (1.0f * (1.0f - FV) + Fd90 * FV);
+  float FL = schlick_fresnel(NdotL), FV = schlick_fresnel(NdotV);
+  const float Fd90 = 0.5f + 2.0f * LdotH * LdotH * bsdf->roughness;
+  float Fd = (1.0f * (1.0f - FL) + Fd90 * FL) * (1.0f * (1.0f - FV) + Fd90 * FV);
 
-	float value = M_1_PI_F * NdotL * Fd;
+  float value = M_1_PI_F * NdotL * Fd;
 
-	return make_float3(value, value, value);
+  return make_float3(value, value, value);
 }
 
 ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID;
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device bool bsdf_principled_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b)
 {
-	const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf*)a;
-	const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf*)b;
+  const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf *)a;
+  const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf *)b;
 
-	return (isequal_float3(bsdf_a->N, bsdf_b->N) && bsdf_a->roughness == bsdf_b->roughness);
+  return (isequal_float3(bsdf_a->N, bsdf_b->N) && bsdf_a->roughness == bsdf_b->roughness);
 }
 
-ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I,
-	const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc,
+                                                       const float3 I,
+                                                       const float3 omega_in,
+                                                       float *pdf)
 {
-	const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
-
-	float3 N = bsdf->N;
-	float3 V = I; // outgoing
-	float3 L = omega_in; // incoming
-	float3 H = normalize(L + V);
-
-	if(dot(N, omega_in) > 0.0f) {
-		*pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
-		return calculate_principled_diffuse_brdf(bsdf, N, V, L, H, pdf);
-	}
-	else {
-		*pdf = 0.0f;
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
+
+  float3 N = bsdf->N;
+  float3 V = I;         // outgoing
+  float3 L = omega_in;  // incoming
+  float3 H = normalize(L + V);
+
+  if (dot(N, omega_in) > 0.0f) {
+    *pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
+    return calculate_principled_diffuse_brdf(bsdf, N, V, L, H, pdf);
+  }
+  else {
+    *pdf = 0.0f;
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
-ccl_device float3 bsdf_principled_diffuse_eval_transmit(const ShaderClosure *sc, const float3 I,
-	const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_principled_diffuse_eval_transmit(const ShaderClosure *sc,
+                                                        const float3 I,
+                                                        const float3 omega_in,
+                                                        float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
 ccl_device int bsdf_principled_diffuse_sample(const ShaderClosure *sc,
-	float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv,
-	float3 *eval, float3 *omega_in, float3 *domega_in_dx,
-	float3 *domega_in_dy, float *pdf)
+                                              float3 Ng,
+                                              float3 I,
+                                              float3 dIdx,
+                                              float3 dIdy,
+                                              float randu,
+                                              float randv,
+                                              float3 *eval,
+                                              float3 *omega_in,
+                                              float3 *domega_in_dx,
+                                              float3 *domega_in_dy,
+                                              float *pdf)
 {
-	const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
+  const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
 
-	float3 N = bsdf->N;
+  float3 N = bsdf->N;
 
-	sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
+  sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
 
-	if(dot(Ng, *omega_in) > 0) {
-		float3 H = normalize(I + *omega_in);
+  if (dot(Ng, *omega_in) > 0) {
+    float3 H = normalize(I + *omega_in);
 
-		*eval = calculate_principled_diffuse_brdf(bsdf, N, I, *omega_in, H, pdf);
+    *eval = calculate_principled_diffuse_brdf(bsdf, N, I, *omega_in, H, pdf);
 
 #ifdef __RAY_DIFFERENTIALS__
-		// TODO: find a better approximation for the diffuse bounce
-		*domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
-		*domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
+    // TODO: find a better approximation for the diffuse bounce
+    *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
+    *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
 #endif
-	}
-	else {
-		*pdf = 0.0f;
-	}
-	return LABEL_REFLECT|LABEL_DIFFUSE;
+  }
+  else {
+    *pdf = 0.0f;
+  }
+  return LABEL_REFLECT | LABEL_DIFFUSE;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_PRINCIPLED_DIFFUSE_H__ */
+#endif /* __BSDF_PRINCIPLED_DIFFUSE_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_principled_sheen.h b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
index ccdcb1babd2..bc522095b3b 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_sheen.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
@@ -25,87 +25,99 @@
 CCL_NAMESPACE_BEGIN
 
 typedef ccl_addr_space struct PrincipledSheenBsdf {
-	SHADER_CLOSURE_BASE;
+  SHADER_CLOSURE_BASE;
 } PrincipledSheenBsdf;
 
-ccl_device float3 calculate_principled_sheen_brdf(const PrincipledSheenBsdf *bsdf,
-	float3 N, float3 V, float3 L, float3 H, float *pdf)
+ccl_device float3 calculate_principled_sheen_brdf(
+    const PrincipledSheenBsdf *bsdf, float3 N, float3 V, float3 L, float3 H, float *pdf)
 {
-	float NdotL = dot(N, L);
-	float NdotV = dot(N, V);
+  float NdotL = dot(N, L);
+  float NdotV = dot(N, V);
 
-	if(NdotL < 0 || NdotV < 0) {
-		*pdf = 0.0f;
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  if (NdotL < 0 || NdotV < 0) {
+    *pdf = 0.0f;
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 
-	float LdotH = dot(L, H);
+  float LdotH = dot(L, H);
 
-	float value = schlick_fresnel(LdotH) * NdotL;
+  float value = schlick_fresnel(LdotH) * NdotL;
 
-	return make_float3(value, value, value);
+  return make_float3(value, value, value);
 }
 
 ccl_device int bsdf_principled_sheen_setup(PrincipledSheenBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_PRINCIPLED_SHEEN_ID;
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  bsdf->type = CLOSURE_BSDF_PRINCIPLED_SHEEN_ID;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_principled_sheen_eval_reflect(const ShaderClosure *sc, const float3 I,
-	const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_principled_sheen_eval_reflect(const ShaderClosure *sc,
+                                                     const float3 I,
+                                                     const float3 omega_in,
+                                                     float *pdf)
 {
-	const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
-
-	float3 N = bsdf->N;
-	float3 V = I; // outgoing
-	float3 L = omega_in; // incoming
-	float3 H = normalize(L + V);
-
-	if(dot(N, omega_in) > 0.0f) {
-		*pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
-		return calculate_principled_sheen_brdf(bsdf, N, V, L, H, pdf);
-	}
-	else {
-		*pdf = 0.0f;
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
+
+  float3 N = bsdf->N;
+  float3 V = I;         // outgoing
+  float3 L = omega_in;  // incoming
+  float3 H = normalize(L + V);
+
+  if (dot(N, omega_in) > 0.0f) {
+    *pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
+    return calculate_principled_sheen_brdf(bsdf, N, V, L, H, pdf);
+  }
+  else {
+    *pdf = 0.0f;
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
-ccl_device float3 bsdf_principled_sheen_eval_transmit(const ShaderClosure *sc, const float3 I,
-	const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_principled_sheen_eval_transmit(const ShaderClosure *sc,
+                                                      const float3 I,
+                                                      const float3 omega_in,
+                                                      float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
 ccl_device int bsdf_principled_sheen_sample(const ShaderClosure *sc,
-	float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv,
-	float3 *eval, float3 *omega_in, float3 *domega_in_dx,
-	float3 *domega_in_dy, float *pdf)
+                                            float3 Ng,
+                                            float3 I,
+                                            float3 dIdx,
+                                            float3 dIdy,
+                                            float randu,
+                                            float randv,
+                                            float3 *eval,
+                                            float3 *omega_in,
+                                            float3 *domega_in_dx,
+                                            float3 *domega_in_dy,
+                                            float *pdf)
 {
-	const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
+  const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
 
-	float3 N = bsdf->N;
+  float3 N = bsdf->N;
 
-	sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
+  sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
 
-	if(dot(Ng, *omega_in) > 0) {
-		float3 H = normalize(I + *omega_in);
+  if (dot(Ng, *omega_in) > 0) {
+    float3 H = normalize(I + *omega_in);
 
-		*eval = calculate_principled_sheen_brdf(bsdf, N, I, *omega_in, H, pdf);
+    *eval = calculate_principled_sheen_brdf(bsdf, N, I, *omega_in, H, pdf);
 
 #ifdef __RAY_DIFFERENTIALS__
-		// TODO: find a better approximation for the diffuse bounce
-		*domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
-		*domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
+    // TODO: find a better approximation for the diffuse bounce
+    *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
+    *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
 #endif
-	}
-	else {
-		*pdf = 0.0f;
-	}
-	return LABEL_REFLECT|LABEL_DIFFUSE;
+  }
+  else {
+    *pdf = 0.0f;
+  }
+  return LABEL_REFLECT | LABEL_DIFFUSE;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_PRINCIPLED_SHEEN_H__ */
+#endif /* __BSDF_PRINCIPLED_SHEEN_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h
index 94f1c283af7..c24ba170915 100644
--- a/intern/cycles/kernel/closure/bsdf_reflection.h
+++ b/intern/cycles/kernel/closure/bsdf_reflection.h
@@ -39,42 +39,59 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device int bsdf_reflection_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_REFLECTION_ID;
-	return SD_BSDF;
+  bsdf->type = CLOSURE_BSDF_REFLECTION_ID;
+  return SD_BSDF;
 }
 
-ccl_device float3 bsdf_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_reflection_eval_reflect(const ShaderClosure *sc,
+                                               const float3 I,
+                                               const float3 omega_in,
+                                               float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_reflection_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_reflection_eval_transmit(const ShaderClosure *sc,
+                                                const float3 I,
+                                                const float3 omega_in,
+                                                float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_reflection_sample(const ShaderClosure *sc,
+                                      float3 Ng,
+                                      float3 I,
+                                      float3 dIdx,
+                                      float3 dIdy,
+                                      float randu,
+                                      float randv,
+                                      float3 *eval,
+                                      float3 *omega_in,
+                                      float3 *domega_in_dx,
+                                      float3 *domega_in_dy,
+                                      float *pdf)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-	float3 N = bsdf->N;
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  float3 N = bsdf->N;
 
-	// only one direction is possible
-	float cosNO = dot(N, I);
-	if(cosNO > 0) {
-		*omega_in = (2 * cosNO) * N - I;
-		if(dot(Ng, *omega_in) > 0) {
+  // only one direction is possible
+  float cosNO = dot(N, I);
+  if (cosNO > 0) {
+    *omega_in = (2 * cosNO) * N - I;
+    if (dot(Ng, *omega_in) > 0) {
 #ifdef __RAY_DIFFERENTIALS__
-			*domega_in_dx = 2 * dot(N, dIdx) * N - dIdx;
-			*domega_in_dy = 2 * dot(N, dIdy) * N - dIdy;
+      *domega_in_dx = 2 * dot(N, dIdx) * N - dIdx;
+      *domega_in_dy = 2 * dot(N, dIdy) * N - dIdy;
 #endif
-			/* Some high number for MIS. */
-			*pdf = 1e6f;
-			*eval = make_float3(1e6f, 1e6f, 1e6f);
-		}
-	}
-	return LABEL_REFLECT|LABEL_SINGULAR;
+      /* Some high number for MIS. */
+      *pdf = 1e6f;
+      *eval = make_float3(1e6f, 1e6f, 1e6f);
+    }
+  }
+  return LABEL_REFLECT | LABEL_SINGULAR;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_REFLECTION_H__ */
+#endif /* __BSDF_REFLECTION_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h
index abdd01c7a1d..d4fbe86dac0 100644
--- a/intern/cycles/kernel/closure/bsdf_refraction.h
+++ b/intern/cycles/kernel/closure/bsdf_refraction.h
@@ -39,51 +39,77 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device int bsdf_refraction_setup(MicrofacetBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_REFRACTION_ID;
-	return SD_BSDF;
+  bsdf->type = CLOSURE_BSDF_REFRACTION_ID;
+  return SD_BSDF;
 }
 
-ccl_device float3 bsdf_refraction_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_refraction_eval_reflect(const ShaderClosure *sc,
+                                               const float3 I,
+                                               const float3 omega_in,
+                                               float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_refraction_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_refraction_eval_transmit(const ShaderClosure *sc,
+                                                const float3 I,
+                                                const float3 omega_in,
+                                                float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_refraction_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_refraction_sample(const ShaderClosure *sc,
+                                      float3 Ng,
+                                      float3 I,
+                                      float3 dIdx,
+                                      float3 dIdy,
+                                      float randu,
+                                      float randv,
+                                      float3 *eval,
+                                      float3 *omega_in,
+                                      float3 *domega_in_dx,
+                                      float3 *domega_in_dy,
+                                      float *pdf)
 {
-	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
-	float m_eta = bsdf->ior;
-	float3 N = bsdf->N;
+  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  float m_eta = bsdf->ior;
+  float3 N = bsdf->N;
 
-	float3 R, T;
+  float3 R, T;
 #ifdef __RAY_DIFFERENTIALS__
-	float3 dRdx, dRdy, dTdx, dTdy;
+  float3 dRdx, dRdy, dTdx, dTdy;
 #endif
-	bool inside;
-	float fresnel;
-	fresnel = fresnel_dielectric(m_eta, N, I, &R, &T,
+  bool inside;
+  float fresnel;
+  fresnel = fresnel_dielectric(m_eta,
+                               N,
+                               I,
+                               &R,
+                               &T,
 #ifdef __RAY_DIFFERENTIALS__
-		dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy,
+                               dIdx,
+                               dIdy,
+                               &dRdx,
+                               &dRdy,
+                               &dTdx,
+                               &dTdy,
 #endif
-		&inside);
+                               &inside);
 
-	if(!inside && fresnel != 1.0f) {
-		/* Some high number for MIS. */
-		*pdf = 1e6f;
-		*eval = make_float3(1e6f, 1e6f, 1e6f);
-		*omega_in = T;
+  if (!inside && fresnel != 1.0f) {
+    /* Some high number for MIS. */
+    *pdf = 1e6f;
+    *eval = make_float3(1e6f, 1e6f, 1e6f);
+    *omega_in = T;
 #ifdef __RAY_DIFFERENTIALS__
-		*domega_in_dx = dTdx;
-		*domega_in_dy = dTdy;
+    *domega_in_dx = dTdx;
+    *domega_in_dy = dTdy;
 #endif
-	}
-	return LABEL_TRANSMIT|LABEL_SINGULAR;
+  }
+  return LABEL_TRANSMIT | LABEL_SINGULAR;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_REFRACTION_H__ */
+#endif /* __BSDF_REFRACTION_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h
index 097a56f22eb..f37fd228087 100644
--- a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@@ -36,183 +36,215 @@
 CCL_NAMESPACE_BEGIN
 
 typedef ccl_addr_space struct ToonBsdf {
-	SHADER_CLOSURE_BASE;
+  SHADER_CLOSURE_BASE;
 
-	float size;
-	float smooth;
+  float size;
+  float smooth;
 } ToonBsdf;
 
 /* DIFFUSE TOON */
 
 ccl_device int bsdf_diffuse_toon_setup(ToonBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
-	bsdf->size = saturate(bsdf->size);
-	bsdf->smooth = saturate(bsdf->smooth);
+  bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
+  bsdf->size = saturate(bsdf->size);
+  bsdf->smooth = saturate(bsdf->smooth);
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
 ccl_device bool bsdf_toon_merge(const ShaderClosure *a, const ShaderClosure *b)
 {
-	const ToonBsdf *bsdf_a = (const ToonBsdf*)a;
-	const ToonBsdf *bsdf_b = (const ToonBsdf*)b;
+  const ToonBsdf *bsdf_a = (const ToonBsdf *)a;
+  const ToonBsdf *bsdf_b = (const ToonBsdf *)b;
 
-	return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
-	       (bsdf_a->size == bsdf_b->size) &&
-		   (bsdf_a->smooth == bsdf_b->smooth);
+  return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->size == bsdf_b->size) &&
+         (bsdf_a->smooth == bsdf_b->smooth);
 }
 
 ccl_device float3 bsdf_toon_get_intensity(float max_angle, float smooth, float angle)
 {
-	float is;
+  float is;
 
-	if(angle < max_angle)
-		is = 1.0f;
-	else if(angle < (max_angle + smooth) && smooth != 0.0f)
-		is = (1.0f - (angle - max_angle)/smooth);
-	else
-		is = 0.0f;
+  if (angle < max_angle)
+    is = 1.0f;
+  else if (angle < (max_angle + smooth) && smooth != 0.0f)
+    is = (1.0f - (angle - max_angle) / smooth);
+  else
+    is = 0.0f;
 
-	return make_float3(is, is, is);
+  return make_float3(is, is, is);
 }
 
 ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth)
 {
-	return fminf(max_angle + smooth, M_PI_2_F);
+  return fminf(max_angle + smooth, M_PI_2_F);
 }
 
-ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc,
+                                                 const float3 I,
+                                                 const float3 omega_in,
+                                                 float *pdf)
 {
-	const ToonBsdf *bsdf = (const ToonBsdf*)sc;
-	float max_angle = bsdf->size*M_PI_2_F;
-	float smooth = bsdf->smooth*M_PI_2_F;
-	float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f));
+  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  float max_angle = bsdf->size * M_PI_2_F;
+  float smooth = bsdf->smooth * M_PI_2_F;
+  float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f));
 
-	float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
+  float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
 
-	if(eval.x > 0.0f) {
-		float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
+  if (eval.x > 0.0f) {
+    float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
 
-		*pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle));
-		return *pdf * eval;
-	}
+    *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle));
+    return *pdf * eval;
+  }
 
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_diffuse_toon_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_toon_eval_transmit(const ShaderClosure *sc,
+                                                  const float3 I,
+                                                  const float3 omega_in,
+                                                  float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc,
+                                        float3 Ng,
+                                        float3 I,
+                                        float3 dIdx,
+                                        float3 dIdy,
+                                        float randu,
+                                        float randv,
+                                        float3 *eval,
+                                        float3 *omega_in,
+                                        float3 *domega_in_dx,
+                                        float3 *domega_in_dy,
+                                        float *pdf)
 {
-	const ToonBsdf *bsdf = (const ToonBsdf*)sc;
-	float max_angle = bsdf->size*M_PI_2_F;
-	float smooth = bsdf->smooth*M_PI_2_F;
-	float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
-	float angle = sample_angle*randu;
+  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  float max_angle = bsdf->size * M_PI_2_F;
+  float smooth = bsdf->smooth * M_PI_2_F;
+  float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
+  float angle = sample_angle * randu;
 
-	if(sample_angle > 0.0f) {
-		sample_uniform_cone(bsdf->N, sample_angle, randu, randv, omega_in, pdf);
+  if (sample_angle > 0.0f) {
+    sample_uniform_cone(bsdf->N, sample_angle, randu, randv, omega_in, pdf);
 
-		if(dot(Ng, *omega_in) > 0.0f) {
-			*eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
+    if (dot(Ng, *omega_in) > 0.0f) {
+      *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
 
 #ifdef __RAY_DIFFERENTIALS__
-			// TODO: find a better approximation for the bounce
-			*domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
-			*domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
+      // TODO: find a better approximation for the bounce
+      *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+      *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
 #endif
-		}
-		else
-			*pdf = 0.0f;
-	}
-
-	return LABEL_REFLECT | LABEL_DIFFUSE;
+    }
+    else
+      *pdf = 0.0f;
+  }
 
+  return LABEL_REFLECT | LABEL_DIFFUSE;
 }
 
 /* GLOSSY TOON */
 
 ccl_device int bsdf_glossy_toon_setup(ToonBsdf *bsdf)
 {
-	bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID;
-	bsdf->size = saturate(bsdf->size);
-	bsdf->smooth = saturate(bsdf->smooth);
+  bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID;
+  bsdf->size = saturate(bsdf->size);
+  bsdf->smooth = saturate(bsdf->smooth);
 
-	return SD_BSDF|SD_BSDF_HAS_EVAL;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc,
+                                                const float3 I,
+                                                const float3 omega_in,
+                                                float *pdf)
 {
-	const ToonBsdf *bsdf = (const ToonBsdf*)sc;
-	float max_angle = bsdf->size*M_PI_2_F;
-	float smooth = bsdf->smooth*M_PI_2_F;
-	float cosNI = dot(bsdf->N, omega_in);
-	float cosNO = dot(bsdf->N, I);
+  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  float max_angle = bsdf->size * M_PI_2_F;
+  float smooth = bsdf->smooth * M_PI_2_F;
+  float cosNI = dot(bsdf->N, omega_in);
+  float cosNO = dot(bsdf->N, I);
 
-	if(cosNI > 0 && cosNO > 0) {
-		/* reflect the view vector */
-		float3 R = (2 * cosNO) * bsdf->N - I;
-		float cosRI = dot(R, omega_in);
+  if (cosNI > 0 && cosNO > 0) {
+    /* reflect the view vector */
+    float3 R = (2 * cosNO) * bsdf->N - I;
+    float cosRI = dot(R, omega_in);
 
-		float angle = safe_acosf(fmaxf(cosRI, 0.0f));
+    float angle = safe_acosf(fmaxf(cosRI, 0.0f));
 
-		float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
-		float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
+    float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
+    float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
 
-		*pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle));
-		return *pdf * eval;
-	}
+    *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle));
+    return *pdf * eval;
+  }
 
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_glossy_toon_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_glossy_toon_eval_transmit(const ShaderClosure *sc,
+                                                 const float3 I,
+                                                 const float3 omega_in,
+                                                 float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc,
+                                       float3 Ng,
+                                       float3 I,
+                                       float3 dIdx,
+                                       float3 dIdy,
+                                       float randu,
+                                       float randv,
+                                       float3 *eval,
+                                       float3 *omega_in,
+                                       float3 *domega_in_dx,
+                                       float3 *domega_in_dy,
+                                       float *pdf)
 {
-	const ToonBsdf *bsdf = (const ToonBsdf*)sc;
-	float max_angle = bsdf->size*M_PI_2_F;
-	float smooth = bsdf->smooth*M_PI_2_F;
-	float cosNO = dot(bsdf->N, I);
+  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  float max_angle = bsdf->size * M_PI_2_F;
+  float smooth = bsdf->smooth * M_PI_2_F;
+  float cosNO = dot(bsdf->N, I);
 
-	if(cosNO > 0) {
-		/* reflect the view vector */
-		float3 R = (2 * cosNO) * bsdf->N - I;
+  if (cosNO > 0) {
+    /* reflect the view vector */
+    float3 R = (2 * cosNO) * bsdf->N - I;
 
-		float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
-		float angle = sample_angle*randu;
+    float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
+    float angle = sample_angle * randu;
 
-		sample_uniform_cone(R, sample_angle, randu, randv, omega_in, pdf);
+    sample_uniform_cone(R, sample_angle, randu, randv, omega_in, pdf);
 
-		if(dot(Ng, *omega_in) > 0.0f) {
-			float cosNI = dot(bsdf->N, *omega_in);
+    if (dot(Ng, *omega_in) > 0.0f) {
+      float cosNI = dot(bsdf->N, *omega_in);
 
-			/* make sure the direction we chose is still in the right hemisphere */
-			if(cosNI > 0) {
-				*eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
+      /* make sure the direction we chose is still in the right hemisphere */
+      if (cosNI > 0) {
+        *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
 
 #ifdef __RAY_DIFFERENTIALS__
-				*domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
-				*domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
+        *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+        *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
 #endif
-			}
-			else
-				*pdf = 0.0f;
-		}
-		else
-			*pdf = 0.0f;
-	}
-
-	return LABEL_GLOSSY | LABEL_REFLECT;
+      }
+      else
+        *pdf = 0.0f;
+    }
+    else
+      *pdf = 0.0f;
+  }
+
+  return LABEL_GLOSSY | LABEL_REFLECT;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_TOON_H__ */
+#endif /* __BSDF_TOON_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_transparent.h b/intern/cycles/kernel/closure/bsdf_transparent.h
index 060dff69f52..4e5513499e8 100644
--- a/intern/cycles/kernel/closure/bsdf_transparent.h
+++ b/intern/cycles/kernel/closure/bsdf_transparent.h
@@ -37,73 +37,91 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device void bsdf_transparent_setup(ShaderData *sd, const float3 weight, int path_flag)
 {
-	/* Check cutoff weight. */
-	float sample_weight = fabsf(average(weight));
-	if(!(sample_weight >= CLOSURE_WEIGHT_CUTOFF)) {
-		return;
-	}
+  /* Check cutoff weight. */
+  float sample_weight = fabsf(average(weight));
+  if (!(sample_weight >= CLOSURE_WEIGHT_CUTOFF)) {
+    return;
+  }
 
-	if(sd->flag & SD_TRANSPARENT) {
-		sd->closure_transparent_extinction += weight;
+  if (sd->flag & SD_TRANSPARENT) {
+    sd->closure_transparent_extinction += weight;
 
-		/* Add weight to existing transparent BSDF. */
-		for(int i = 0; i < sd->num_closure; i++) {
-			ShaderClosure *sc = &sd->closure[i];
+    /* Add weight to existing transparent BSDF. */
+    for (int i = 0; i < sd->num_closure; i++) {
+      ShaderClosure *sc = &sd->closure[i];
 
-			if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
-				sc->weight += weight;
-				sc->sample_weight += sample_weight;
-				break;
-			}
-		}
-	}
-	else {
-		sd->flag |= SD_BSDF|SD_TRANSPARENT;
-		sd->closure_transparent_extinction = weight;
+      if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
+        sc->weight += weight;
+        sc->sample_weight += sample_weight;
+        break;
+      }
+    }
+  }
+  else {
+    sd->flag |= SD_BSDF | SD_TRANSPARENT;
+    sd->closure_transparent_extinction = weight;
 
-		if(path_flag & PATH_RAY_TERMINATE) {
-			/* In this case the number of closures is set to zero to disable
-			 * all others, but we still want to get transparency so increase
-			 * the number just for this. */
-			sd->num_closure_left = 1;
-		}
+    if (path_flag & PATH_RAY_TERMINATE) {
+      /* In this case the number of closures is set to zero to disable
+       * all others, but we still want to get transparency so increase
+       * the number just for this. */
+      sd->num_closure_left = 1;
+    }
 
-		/* Create new transparent BSDF. */
-		ShaderClosure *bsdf = closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_BSDF_TRANSPARENT_ID, weight);
+    /* Create new transparent BSDF. */
+    ShaderClosure *bsdf = closure_alloc(
+        sd, sizeof(ShaderClosure), CLOSURE_BSDF_TRANSPARENT_ID, weight);
 
-		if(bsdf) {
-			bsdf->sample_weight = sample_weight;
-			bsdf->N = sd->N;
-		}
-		else if(path_flag & PATH_RAY_TERMINATE) {
-			sd->num_closure_left = 0;
-		}
-	}
+    if (bsdf) {
+      bsdf->sample_weight = sample_weight;
+      bsdf->N = sd->N;
+    }
+    else if (path_flag & PATH_RAY_TERMINATE) {
+      sd->num_closure_left = 0;
+    }
+  }
 }
 
-ccl_device float3 bsdf_transparent_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_transparent_eval_reflect(const ShaderClosure *sc,
+                                                const float3 I,
+                                                const float3 omega_in,
+                                                float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_transparent_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_transparent_eval_transmit(const ShaderClosure *sc,
+                                                 const float3 I,
+                                                 const float3 omega_in,
+                                                 float *pdf)
 {
-	return make_float3(0.0f, 0.0f, 0.0f);
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_transparent_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_transparent_sample(const ShaderClosure *sc,
+                                       float3 Ng,
+                                       float3 I,
+                                       float3 dIdx,
+                                       float3 dIdy,
+                                       float randu,
+                                       float randv,
+                                       float3 *eval,
+                                       float3 *omega_in,
+                                       float3 *domega_in_dx,
+                                       float3 *domega_in_dy,
+                                       float *pdf)
 {
-	// only one direction is possible
-	*omega_in = -I;
+  // only one direction is possible
+  *omega_in = -I;
 #ifdef __RAY_DIFFERENTIALS__
-	*domega_in_dx = -dIdx;
-	*domega_in_dy = -dIdy;
+  *domega_in_dx = -dIdx;
+  *domega_in_dy = -dIdy;
 #endif
-	*pdf = 1;
-	*eval = make_float3(1, 1, 1);
-	return LABEL_TRANSMIT|LABEL_TRANSPARENT;
+  *pdf = 1;
+  *eval = make_float3(1, 1, 1);
+  return LABEL_TRANSMIT | LABEL_TRANSPARENT;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_TRANSPARENT_H__ */
+#endif /* __BSDF_TRANSPARENT_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h
index 4f3453675c7..a9a27edd7de 100644
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -35,127 +35,134 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float fresnel_dielectric(
-        float eta, const float3 N,
-        const float3 I, float3 *R, float3 *T,
+ccl_device float fresnel_dielectric(float eta,
+                                    const float3 N,
+                                    const float3 I,
+                                    float3 *R,
+                                    float3 *T,
 #ifdef __RAY_DIFFERENTIALS__
-        const float3 dIdx, const float3 dIdy,
-        float3 *dRdx, float3 *dRdy,
-        float3 *dTdx, float3 *dTdy,
+                                    const float3 dIdx,
+                                    const float3 dIdy,
+                                    float3 *dRdx,
+                                    float3 *dRdy,
+                                    float3 *dTdx,
+                                    float3 *dTdy,
 #endif
-        bool *is_inside)
+                                    bool *is_inside)
 {
-	float cos = dot(N, I), neta;
-	float3 Nn;
+  float cos = dot(N, I), neta;
+  float3 Nn;
 
-	// check which side of the surface we are on
-	if(cos > 0) {
-		// we are on the outside of the surface, going in
-		neta = 1 / eta;
-		Nn   = N;
-		*is_inside = false;
-	}
-	else {
-		// we are inside the surface
-		cos  = -cos;
-		neta = eta;
-		Nn   = -N;
-		*is_inside = true;
-	}
+  // check which side of the surface we are on
+  if (cos > 0) {
+    // we are on the outside of the surface, going in
+    neta = 1 / eta;
+    Nn = N;
+    *is_inside = false;
+  }
+  else {
+    // we are inside the surface
+    cos = -cos;
+    neta = eta;
+    Nn = -N;
+    *is_inside = true;
+  }
 
-	// compute reflection
-	*R = (2 * cos)* Nn - I;
+  // compute reflection
+  *R = (2 * cos) * Nn - I;
 #ifdef __RAY_DIFFERENTIALS__
-	*dRdx = (2 * dot(Nn, dIdx)) * Nn - dIdx;
-	*dRdy = (2 * dot(Nn, dIdy)) * Nn - dIdy;
+  *dRdx = (2 * dot(Nn, dIdx)) * Nn - dIdx;
+  *dRdy = (2 * dot(Nn, dIdy)) * Nn - dIdy;
 #endif
 
-	float arg = 1 -(neta * neta *(1 -(cos * cos)));
-	if(arg < 0) {
-		*T = make_float3(0.0f, 0.0f, 0.0f);
+  float arg = 1 - (neta * neta * (1 - (cos * cos)));
+  if (arg < 0) {
+    *T = make_float3(0.0f, 0.0f, 0.0f);
 #ifdef __RAY_DIFFERENTIALS__
-		*dTdx = make_float3(0.0f, 0.0f, 0.0f);
-		*dTdy = make_float3(0.0f, 0.0f, 0.0f);
+    *dTdx = make_float3(0.0f, 0.0f, 0.0f);
+    *dTdy = make_float3(0.0f, 0.0f, 0.0f);
 #endif
-		return 1; // total internal reflection
-	}
-	else {
-		float dnp = max(sqrtf(arg), 1e-7f);
-		float nK = (neta * cos)- dnp;
-		*T = -(neta * I)+(nK * Nn);
+    return 1;  // total internal reflection
+  }
+  else {
+    float dnp = max(sqrtf(arg), 1e-7f);
+    float nK = (neta * cos) - dnp;
+    *T = -(neta * I) + (nK * Nn);
 #ifdef __RAY_DIFFERENTIALS__
-		*dTdx = -(neta * dIdx) + ((neta - neta * neta * cos / dnp) * dot(dIdx, Nn)) * Nn;
-		*dTdy = -(neta * dIdy) + ((neta - neta * neta * cos / dnp) * dot(dIdy, Nn)) * Nn;
+    *dTdx = -(neta * dIdx) + ((neta - neta * neta * cos / dnp) * dot(dIdx, Nn)) * Nn;
+    *dTdy = -(neta * dIdy) + ((neta - neta * neta * cos / dnp) * dot(dIdy, Nn)) * Nn;
 #endif
-		// compute Fresnel terms
-		float cosTheta1 = cos; // N.R
-		float cosTheta2 = -dot(Nn, *T);
-		float pPara = (cosTheta1 - eta * cosTheta2)/(cosTheta1 + eta * cosTheta2);
-		float pPerp = (eta * cosTheta1 - cosTheta2)/(eta * cosTheta1 + cosTheta2);
-		return 0.5f * (pPara * pPara + pPerp * pPerp);
-	}
+    // compute Fresnel terms
+    float cosTheta1 = cos;  // N.R
+    float cosTheta2 = -dot(Nn, *T);
+    float pPara = (cosTheta1 - eta * cosTheta2) / (cosTheta1 + eta * cosTheta2);
+    float pPerp = (eta * cosTheta1 - cosTheta2) / (eta * cosTheta1 + cosTheta2);
+    return 0.5f * (pPara * pPara + pPerp * pPerp);
+  }
 }
 
 ccl_device float fresnel_dielectric_cos(float cosi, float eta)
 {
-	// compute fresnel reflectance without explicitly computing
-	// the refracted direction
-	float c = fabsf(cosi);
-	float g = eta * eta - 1 + c * c;
-	if(g > 0) {
-		g = sqrtf(g);
-		float A = (g - c)/(g + c);
-		float B = (c *(g + c)- 1)/(c *(g - c)+ 1);
-		return 0.5f * A * A *(1 + B * B);
-	}
-	return 1.0f; // TIR(no refracted component)
+  // compute fresnel reflectance without explicitly computing
+  // the refracted direction
+  float c = fabsf(cosi);
+  float g = eta * eta - 1 + c * c;
+  if (g > 0) {
+    g = sqrtf(g);
+    float A = (g - c) / (g + c);
+    float B = (c * (g + c) - 1) / (c * (g - c) + 1);
+    return 0.5f * A * A * (1 + B * B);
+  }
+  return 1.0f;  // TIR(no refracted component)
 }
 
 ccl_device float3 fresnel_conductor(float cosi, const float3 eta, const float3 k)
 {
-	float3 cosi2 = make_float3(cosi*cosi, cosi*cosi, cosi*cosi);
-	float3 one = make_float3(1.0f, 1.0f, 1.0f);
-	float3 tmp_f = eta * eta + k * k;
-	float3 tmp = tmp_f * cosi2;
-	float3 Rparl2 = (tmp - (2.0f * eta * cosi) + one) /
-					(tmp + (2.0f * eta * cosi) + one);
-	float3 Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) /
-					(tmp_f + (2.0f * eta * cosi) + cosi2);
-	return(Rparl2 + Rperp2) * 0.5f;
+  float3 cosi2 = make_float3(cosi * cosi, cosi * cosi, cosi * cosi);
+  float3 one = make_float3(1.0f, 1.0f, 1.0f);
+  float3 tmp_f = eta * eta + k * k;
+  float3 tmp = tmp_f * cosi2;
+  float3 Rparl2 = (tmp - (2.0f * eta * cosi) + one) / (tmp + (2.0f * eta * cosi) + one);
+  float3 Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) / (tmp_f + (2.0f * eta * cosi) + cosi2);
+  return (Rparl2 + Rperp2) * 0.5f;
 }
 
 ccl_device float schlick_fresnel(float u)
 {
-	float m = clamp(1.0f - u, 0.0f, 1.0f);
-	float m2 = m * m;
-	return m2 * m2 * m; // pow(m, 5)
+  float m = clamp(1.0f - u, 0.0f, 1.0f);
+  float m2 = m * m;
+  return m2 * m2 * m;  // pow(m, 5)
 }
 
 ccl_device float smooth_step(float edge0, float edge1, float x)
 {
-	float result;
-	if(x < edge0) result = 0.0f;
-	else if(x >= edge1) result = 1.0f;
-	else {
-		float t = (x - edge0)/(edge1 - edge0);
-		result = (3.0f-2.0f*t)*(t*t);
-	}
-	return result;
+  float result;
+  if (x < edge0)
+    result = 0.0f;
+  else if (x >= edge1)
+    result = 1.0f;
+  else {
+    float t = (x - edge0) / (edge1 - edge0);
+    result = (3.0f - 2.0f * t) * (t * t);
+  }
+  return result;
 }
 
 /* Calculate the fresnel color which is a blend between white and the F0 color (cspec0) */
-ccl_device_forceinline float3 interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0) {
-	/* Calculate the fresnel interpolation factor
-	 * The value from fresnel_dielectric_cos(...) has to be normalized because
-	 * the cspec0 keeps the F0 color
-	*/
-	float F0_norm = 1.0f / (1.0f - F0);
-	float FH = (fresnel_dielectric_cos(dot(L, H), ior) - F0) * F0_norm;
+ccl_device_forceinline float3
+interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0)
+{
+  /* Calculate the fresnel interpolation factor
+   * The value from fresnel_dielectric_cos(...) has to be normalized because
+   * the cspec0 keeps the F0 color
+  */
+  float F0_norm = 1.0f / (1.0f - F0);
+  float FH = (fresnel_dielectric_cos(dot(L, H), ior) - F0) * F0_norm;
 
-	/* Blend between white and a specular color with respect to the fresnel */
-	return cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
+  /* Blend between white and a specular color with respect to the fresnel */
+  return cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __BSDF_UTIL_H__ */
+#endif /* __BSDF_UTIL_H__ */
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index 98c7f23c288..57804eca269 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -20,14 +20,14 @@
 CCL_NAMESPACE_BEGIN
 
 typedef ccl_addr_space struct Bssrdf {
-	SHADER_CLOSURE_BASE;
-
-	float3 radius;
-	float3 albedo;
-	float sharpness;
-	float texture_blur;
-	float roughness;
-	float channels;
+  SHADER_CLOSURE_BASE;
+
+  float3 radius;
+  float3 albedo;
+  float sharpness;
+  float texture_blur;
+  float roughness;
+  float channels;
 } Bssrdf;
 
 /* Planar Truncated Gaussian
@@ -41,41 +41,41 @@ typedef ccl_addr_space struct Bssrdf {
 
 ccl_device float bssrdf_gaussian_eval(const float radius, float r)
 {
-	/* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
-	 * = 1 - exp(-Rm*Rm/(2*v)) */
-	const float v = radius*radius*(0.25f*0.25f);
-	const float Rm = sqrtf(v*GAUSS_TRUNCATE);
+  /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
+   * = 1 - exp(-Rm*Rm/(2*v)) */
+  const float v = radius * radius * (0.25f * 0.25f);
+  const float Rm = sqrtf(v * GAUSS_TRUNCATE);
 
-	if(r >= Rm)
-		return 0.0f;
+  if (r >= Rm)
+    return 0.0f;
 
-	return expf(-r*r/(2.0f*v))/(2.0f*M_PI_F*v);
+  return expf(-r * r / (2.0f * v)) / (2.0f * M_PI_F * v);
 }
 
 ccl_device float bssrdf_gaussian_pdf(const float radius, float r)
 {
-	/* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
-	const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE);
+  /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
+  const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
 
-	return bssrdf_gaussian_eval(radius, r) * (1.0f/(area_truncated));
+  return bssrdf_gaussian_eval(radius, r) * (1.0f / (area_truncated));
 }
 
 ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, float *h)
 {
-	/* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
-	 * r = sqrt(-2*v*logf(xi)) */
-	const float v = radius*radius*(0.25f*0.25f);
-	const float Rm = sqrtf(v*GAUSS_TRUNCATE);
+  /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
+   * r = sqrt(-2*v*logf(xi)) */
+  const float v = radius * radius * (0.25f * 0.25f);
+  const float Rm = sqrtf(v * GAUSS_TRUNCATE);
 
-	/* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
-	const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE);
+  /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
+  const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
 
-	/* r(xi) */
-	const float r_squared = -2.0f*v*logf(1.0f - xi*area_truncated);
-	*r = sqrtf(r_squared);
+  /* r(xi) */
+  const float r_squared = -2.0f * v * logf(1.0f - xi * area_truncated);
+  *r = sqrtf(r_squared);
 
-	/* h^2 + r^2 = Rm^2 */
-	*h = safe_sqrtf(Rm*Rm - r_squared);
+  /* h^2 + r^2 = Rm^2 */
+  *h = safe_sqrtf(Rm * Rm - r_squared);
 }
 
 /* Planar Cubic BSSRDF falloff
@@ -87,97 +87,97 @@ ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, f
 
 ccl_device float bssrdf_cubic_eval(const float radius, const float sharpness, float r)
 {
-	if(sharpness == 0.0f) {
-		const float Rm = radius;
-
-		if(r >= Rm)
-			return 0.0f;
-
-		/* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
-		const float Rm5 = (Rm*Rm) * (Rm*Rm) * Rm;
-		const float f = Rm - r;
-		const float num = f*f*f;
-
-		return (10.0f * num) / (Rm5 * M_PI_F);
-
-	}
-	else {
-		float Rm = radius*(1.0f + sharpness);
-
-		if(r >= Rm)
-			return 0.0f;
-
-		/* custom variation with extra sharpness, to match the previous code */
-		const float y = 1.0f/(1.0f + sharpness);
-		float Rmy, ry, ryinv;
-
-		if(sharpness == 1.0f) {
-			Rmy = sqrtf(Rm);
-			ry = sqrtf(r);
-			ryinv = (ry > 0.0f)? 1.0f/ry: 0.0f;
-		}
-		else {
-			Rmy = powf(Rm, y);
-			ry = powf(r, y);
-			ryinv = (r > 0.0f)? powf(r, y - 1.0f): 0.0f;
-		}
-
-		const float Rmy5 = (Rmy*Rmy) * (Rmy*Rmy) * Rmy;
-		const float f = Rmy - ry;
-		const float num = f*(f*f)*(y*ryinv);
-
-		return (10.0f * num) / (Rmy5 * M_PI_F);
-	}
+  if (sharpness == 0.0f) {
+    const float Rm = radius;
+
+    if (r >= Rm)
+      return 0.0f;
+
+    /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
+    const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm;
+    const float f = Rm - r;
+    const float num = f * f * f;
+
+    return (10.0f * num) / (Rm5 * M_PI_F);
+  }
+  else {
+    float Rm = radius * (1.0f + sharpness);
+
+    if (r >= Rm)
+      return 0.0f;
+
+    /* custom variation with extra sharpness, to match the previous code */
+    const float y = 1.0f / (1.0f + sharpness);
+    float Rmy, ry, ryinv;
+
+    if (sharpness == 1.0f) {
+      Rmy = sqrtf(Rm);
+      ry = sqrtf(r);
+      ryinv = (ry > 0.0f) ? 1.0f / ry : 0.0f;
+    }
+    else {
+      Rmy = powf(Rm, y);
+      ry = powf(r, y);
+      ryinv = (r > 0.0f) ? powf(r, y - 1.0f) : 0.0f;
+    }
+
+    const float Rmy5 = (Rmy * Rmy) * (Rmy * Rmy) * Rmy;
+    const float f = Rmy - ry;
+    const float num = f * (f * f) * (y * ryinv);
+
+    return (10.0f * num) / (Rmy5 * M_PI_F);
+  }
 }
 
 ccl_device float bssrdf_cubic_pdf(const float radius, const float sharpness, float r)
 {
-	return bssrdf_cubic_eval(radius, sharpness, r);
+  return bssrdf_cubic_eval(radius, sharpness, r);
 }
 
 /* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
 ccl_device_forceinline float bssrdf_cubic_quintic_root_find(float xi)
 {
-	/* newton-raphson iteration, usually succeeds in 2-4 iterations, except
-	 * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
-	 * should not be too bad */
-	const float tolerance = 1e-6f;
-	const int max_iteration_count = 10;
-	float x = 0.25f;
-	int i;
+  /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
+   * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
+   * should not be too bad */
+  const float tolerance = 1e-6f;
+  const int max_iteration_count = 10;
+  float x = 0.25f;
+  int i;
 
-	for(i = 0; i < max_iteration_count; i++) {
-		float x2 = x*x;
-		float x3 = x2*x;
-		float nx = (1.0f - x);
+  for (i = 0; i < max_iteration_count; i++) {
+    float x2 = x * x;
+    float x3 = x2 * x;
+    float nx = (1.0f - x);
 
-		float f = 10.0f*x2 - 20.0f*x3 + 15.0f*x2*x2 - 4.0f*x2*x3 - xi;
-		float f_ = 20.0f*(x*nx)*(nx*nx);
+    float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi;
+    float f_ = 20.0f * (x * nx) * (nx * nx);
 
-		if(fabsf(f) < tolerance || f_ == 0.0f)
-			break;
+    if (fabsf(f) < tolerance || f_ == 0.0f)
+      break;
 
-		x = saturate(x - f/f_);
-	}
+    x = saturate(x - f / f_);
+  }
 
-	return x;
+  return x;
 }
 
-ccl_device void bssrdf_cubic_sample(const float radius, const float sharpness, float xi, float *r, float *h)
+ccl_device void bssrdf_cubic_sample(
+    const float radius, const float sharpness, float xi, float *r, float *h)
 {
-	float Rm = radius;
-	float r_ = bssrdf_cubic_quintic_root_find(xi);
+  float Rm = radius;
+  float r_ = bssrdf_cubic_quintic_root_find(xi);
 
-	if(sharpness != 0.0f) {
-		r_ = powf(r_, 1.0f + sharpness);
-		Rm *= (1.0f + sharpness);
-	}
+  if (sharpness != 0.0f) {
+    r_ = powf(r_, 1.0f + sharpness);
+    Rm *= (1.0f + sharpness);
+  }
 
-	r_ *= Rm;
-	*r = r_;
+  r_ *= Rm;
+  *r = r_;
 
-	/* h^2 + r^2 = Rm^2 */
-	*h = safe_sqrtf(Rm*Rm - r_*r_);
+  /* h^2 + r^2 = Rm^2 */
+  *h = safe_sqrtf(Rm * Rm - r_ * r_);
 }
 
 /* Approximate Reflectance Profiles
@@ -188,13 +188,13 @@ ccl_device void bssrdf_cubic_sample(const float radius, const float sharpness, f
  * the mean free length, but still not too big so sampling is still
  * effective. Might need some further tweaks.
  */
-#define BURLEY_TRUNCATE     16.0f
-#define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE)
+#define BURLEY_TRUNCATE 16.0f
+#define BURLEY_TRUNCATE_CDF 0.9963790093708328f  // cdf(BURLEY_TRUNCATE)
 
 ccl_device_inline float bssrdf_burley_fitting(float A)
 {
-	/* Diffuse surface transmission, equation (6). */
-	return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f);
+  /* Diffuse surface transmission, equation (6). */
+  return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f);
 }
 
 /* Scale mean free path length so it gives similar looking result
@@ -202,45 +202,44 @@ ccl_device_inline float bssrdf_burley_fitting(float A)
  */
 ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r)
 {
-	return 0.25f * M_1_PI_F * r;
+  return 0.25f * M_1_PI_F * r;
 }
 
 ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf)
 {
-	/* Mean free path length. */
-	const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
-	/* Surface albedo. */
-	const float3 A = bssrdf->albedo;
-	const float3 s = make_float3(bssrdf_burley_fitting(A.x),
-                                 bssrdf_burley_fitting(A.y),
-                                 bssrdf_burley_fitting(A.z));
-
-	bssrdf->radius = l / s;
+  /* Mean free path length. */
+  const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
+  /* Surface albedo. */
+  const float3 A = bssrdf->albedo;
+  const float3 s = make_float3(
+      bssrdf_burley_fitting(A.x), bssrdf_burley_fitting(A.y), bssrdf_burley_fitting(A.z));
+
+  bssrdf->radius = l / s;
 }
 
 ccl_device float bssrdf_burley_eval(const float d, float r)
 {
-	const float Rm = BURLEY_TRUNCATE * d;
-
-	if(r >= Rm)
-		return 0.0f;
-
-	/* Burley refletance profile, equation (3).
-	 *
-	 * NOTES:
-	 * - Surface albedo is already included into sc->weight, no need to
-	 *   multiply by this term here.
-	 * - This is normalized diffuse model, so the equation is mutliplied
-	 *   by 2*pi, which also matches cdf().
-	 */
-	float exp_r_3_d = expf(-r / (3.0f * d));
-	float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
-	return (exp_r_d + exp_r_3_d) / (4.0f*d);
+  const float Rm = BURLEY_TRUNCATE * d;
+
+  if (r >= Rm)
+    return 0.0f;
+
+  /* Burley refletance profile, equation (3).
+   *
+   * NOTES:
+   * - Surface albedo is already included into sc->weight, no need to
+   *   multiply by this term here.
+   * - This is normalized diffuse model, so the equation is mutliplied
+   *   by 2*pi, which also matches cdf().
+   */
+  float exp_r_3_d = expf(-r / (3.0f * d));
+  float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
+  return (exp_r_d + exp_r_3_d) / (4.0f * d);
 }
 
 ccl_device float bssrdf_burley_pdf(const float d, float r)
 {
-	return bssrdf_burley_eval(d, r) * (1.0f/BURLEY_TRUNCATE_CDF);
+  return bssrdf_burley_eval(d, r) * (1.0f / BURLEY_TRUNCATE_CDF);
 }
 
 /* Find the radius for desired CDF value.
@@ -249,52 +248,49 @@ ccl_device float bssrdf_burley_pdf(const float d, float r)
  */
 ccl_device_forceinline float bssrdf_burley_root_find(float xi)
 {
-	const float tolerance = 1e-6f;
-	const int max_iteration_count = 10;
-	/* Do initial guess based on manual curve fitting, this allows us to reduce
-	 * number of iterations to maximum 4 across the [0..1] range. We keep maximum
-	 * number of iteration higher just to be sure we didn't miss root in some
-	 * corner case.
-	 */
-	float r;
-	if(xi <= 0.9f) {
-		r = expf(xi * xi * 2.4f) - 1.0f;
-	}
-	else {
-		/* TODO(sergey): Some nicer curve fit is possible here. */
-		r = 15.0f;
-	}
-	/* Solve against scaled radius. */
-	for(int i = 0; i < max_iteration_count; i++) {
-		float exp_r_3 = expf(-r / 3.0f);
-		float exp_r = exp_r_3 * exp_r_3 * exp_r_3;
-		float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi;
-		float f_ = 0.25f * exp_r + 0.25f * exp_r_3;
-
-		if(fabsf(f) < tolerance || f_ == 0.0f) {
-			break;
-		}
-
-		r = r - f/f_;
-		if(r < 0.0f) {
-			r = 0.0f;
-		}
-	}
-	return r;
+  const float tolerance = 1e-6f;
+  const int max_iteration_count = 10;
+  /* Do initial guess based on manual curve fitting, this allows us to reduce
+   * number of iterations to maximum 4 across the [0..1] range. We keep maximum
+   * number of iteration higher just to be sure we didn't miss root in some
+   * corner case.
+   */
+  float r;
+  if (xi <= 0.9f) {
+    r = expf(xi * xi * 2.4f) - 1.0f;
+  }
+  else {
+    /* TODO(sergey): Some nicer curve fit is possible here. */
+    r = 15.0f;
+  }
+  /* Solve against scaled radius. */
+  for (int i = 0; i < max_iteration_count; i++) {
+    float exp_r_3 = expf(-r / 3.0f);
+    float exp_r = exp_r_3 * exp_r_3 * exp_r_3;
+    float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi;
+    float f_ = 0.25f * exp_r + 0.25f * exp_r_3;
+
+    if (fabsf(f) < tolerance || f_ == 0.0f) {
+      break;
+    }
+
+    r = r - f / f_;
+    if (r < 0.0f) {
+      r = 0.0f;
+    }
+  }
+  return r;
 }
 
-ccl_device void bssrdf_burley_sample(const float d,
-                                     float xi,
-                                     float *r,
-                                     float *h)
+ccl_device void bssrdf_burley_sample(const float d, float xi, float *r, float *h)
 {
-	const float Rm = BURLEY_TRUNCATE * d;
-	const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
+  const float Rm = BURLEY_TRUNCATE * d;
+  const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
 
-	*r = r_;
+  *r = r_;
 
-	/* h^2 + r^2 = Rm^2 */
-	*h = safe_sqrtf(Rm*Rm - r_*r_);
+  /* h^2 + r^2 = Rm^2 */
+  *h = safe_sqrtf(Rm * Rm - r_ * r_);
 }
 
 /* None BSSRDF falloff
@@ -303,200 +299,195 @@ ccl_device void bssrdf_burley_sample(const float d,
 
 ccl_device float bssrdf_none_eval(const float radius, float r)
 {
-	const float Rm = radius;
-	return (r < Rm)? 1.0f: 0.0f;
+  const float Rm = radius;
+  return (r < Rm) ? 1.0f : 0.0f;
 }
 
 ccl_device float bssrdf_none_pdf(const float radius, float r)
 {
-	/* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
-	const float Rm = radius;
-	const float area = (M_PI_F*Rm*Rm);
+  /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
+  const float Rm = radius;
+  const float area = (M_PI_F * Rm * Rm);
 
-	return bssrdf_none_eval(radius, r) / area;
+  return bssrdf_none_eval(radius, r) / area;
 }
 
 ccl_device void bssrdf_none_sample(const float radius, float xi, float *r, float *h)
 {
-	/* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
-	 * r = sqrt(xi)*Rm */
-	const float Rm = radius;
-	const float r_ = sqrtf(xi)*Rm;
+  /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
+   * r = sqrt(xi)*Rm */
+  const float Rm = radius;
+  const float r_ = sqrtf(xi) * Rm;
 
-	*r = r_;
+  *r = r_;
 
-	/* h^2 + r^2 = Rm^2 */
-	*h = safe_sqrtf(Rm*Rm - r_*r_);
+  /* h^2 + r^2 = Rm^2 */
+  *h = safe_sqrtf(Rm * Rm - r_ * r_);
 }
 
 /* Generic */
 
 ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
 {
-	Bssrdf *bssrdf = (Bssrdf*)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
+  Bssrdf *bssrdf = (Bssrdf *)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
 
-	if(bssrdf == NULL) {
-		return NULL;
-	}
+  if (bssrdf == NULL) {
+    return NULL;
+  }
 
-	float sample_weight = fabsf(average(weight));
-	bssrdf->sample_weight = sample_weight;
-	return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
+  float sample_weight = fabsf(average(weight));
+  bssrdf->sample_weight = sample_weight;
+  return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
 }
 
 ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type)
 {
-	int flag = 0;
-	int bssrdf_channels = 3;
-	float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f);
-
-	/* Verify if the radii are large enough to sample without precision issues. */
-	if(bssrdf->radius.x < BSSRDF_MIN_RADIUS) {
-		diffuse_weight.x = bssrdf->weight.x;
-		bssrdf->weight.x = 0.0f;
-		bssrdf->radius.x = 0.0f;
-		bssrdf_channels--;
-	}
-	if(bssrdf->radius.y < BSSRDF_MIN_RADIUS) {
-		diffuse_weight.y = bssrdf->weight.y;
-		bssrdf->weight.y = 0.0f;
-		bssrdf->radius.y = 0.0f;
-		bssrdf_channels--;
-	}
-	if(bssrdf->radius.z < BSSRDF_MIN_RADIUS) {
-		diffuse_weight.z = bssrdf->weight.z;
-		bssrdf->weight.z = 0.0f;
-		bssrdf->radius.z = 0.0f;
-		bssrdf_channels--;
-	}
-
-	if(bssrdf_channels < 3) {
-		/* Add diffuse BSDF if any radius too small. */
+  int flag = 0;
+  int bssrdf_channels = 3;
+  float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f);
+
+  /* Verify if the radii are large enough to sample without precision issues. */
+  if (bssrdf->radius.x < BSSRDF_MIN_RADIUS) {
+    diffuse_weight.x = bssrdf->weight.x;
+    bssrdf->weight.x = 0.0f;
+    bssrdf->radius.x = 0.0f;
+    bssrdf_channels--;
+  }
+  if (bssrdf->radius.y < BSSRDF_MIN_RADIUS) {
+    diffuse_weight.y = bssrdf->weight.y;
+    bssrdf->weight.y = 0.0f;
+    bssrdf->radius.y = 0.0f;
+    bssrdf_channels--;
+  }
+  if (bssrdf->radius.z < BSSRDF_MIN_RADIUS) {
+    diffuse_weight.z = bssrdf->weight.z;
+    bssrdf->weight.z = 0.0f;
+    bssrdf->radius.z = 0.0f;
+    bssrdf_channels--;
+  }
+
+  if (bssrdf_channels < 3) {
+    /* Add diffuse BSDF if any radius too small. */
 #ifdef __PRINCIPLED__
-		if(type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
-		   type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
-		{
-			float roughness = bssrdf->roughness;
-			float3 N = bssrdf->N;
-
-			PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
-
-			if(bsdf) {
-				bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
-				bsdf->N = N;
-				bsdf->roughness = roughness;
-				flag |= bsdf_principled_diffuse_setup(bsdf);
-			}
-		}
-		else
-#endif  /* __PRINCIPLED__ */
-		{
-			DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight);
-
-			if(bsdf) {
-				bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
-				bsdf->N = bssrdf->N;
-				flag |= bsdf_diffuse_setup(bsdf);
-			}
-		}
-	}
-
-	/* Setup BSSRDF if radius is large enough. */
-	if(bssrdf_channels > 0) {
-		bssrdf->type = type;
-		bssrdf->channels = bssrdf_channels;
-		bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels;
-		bssrdf->texture_blur = saturate(bssrdf->texture_blur);
-		bssrdf->sharpness = saturate(bssrdf->sharpness);
-
-		if(type == CLOSURE_BSSRDF_BURLEY_ID ||
-		   type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
-		   type == CLOSURE_BSSRDF_RANDOM_WALK_ID ||
-		   type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
-		{
-			bssrdf_burley_setup(bssrdf);
-		}
-
-		flag |= SD_BSSRDF;
-	}
-	else {
-		bssrdf->type = type;
-		bssrdf->sample_weight = 0.0f;
-	}
-
-	return flag;
+    if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
+      float roughness = bssrdf->roughness;
+      float3 N = bssrdf->N;
+
+      PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+          sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
+
+      if (bsdf) {
+        bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
+        bsdf->N = N;
+        bsdf->roughness = roughness;
+        flag |= bsdf_principled_diffuse_setup(bsdf);
+      }
+    }
+    else
+#endif /* __PRINCIPLED__ */
+    {
+      DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight);
+
+      if (bsdf) {
+        bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
+        bsdf->N = bssrdf->N;
+        flag |= bsdf_diffuse_setup(bsdf);
+      }
+    }
+  }
+
+  /* Setup BSSRDF if radius is large enough. */
+  if (bssrdf_channels > 0) {
+    bssrdf->type = type;
+    bssrdf->channels = bssrdf_channels;
+    bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels;
+    bssrdf->texture_blur = saturate(bssrdf->texture_blur);
+    bssrdf->sharpness = saturate(bssrdf->sharpness);
+
+    if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
+        type == CLOSURE_BSSRDF_RANDOM_WALK_ID ||
+        type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
+      bssrdf_burley_setup(bssrdf);
+    }
+
+    flag |= SD_BSSRDF;
+  }
+  else {
+    bssrdf->type = type;
+    bssrdf->sample_weight = 0.0f;
+  }
+
+  return flag;
 }
 
 ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float *h)
 {
-	const Bssrdf *bssrdf = (const Bssrdf*)sc;
-	float radius;
-
-	/* Sample color channel and reuse random number. Only a subset of channels
-	 * may be used if their radius was too small to handle as BSSRDF. */
-	xi *= bssrdf->channels;
-
-	if(xi < 1.0f) {
-		radius = (bssrdf->radius.x > 0.0f)? bssrdf->radius.x:
-		         (bssrdf->radius.y > 0.0f)? bssrdf->radius.y:
-		                                    bssrdf->radius.z;
-	}
-	else if(xi < 2.0f) {
-		xi -= 1.0f;
-		radius = (bssrdf->radius.x > 0.0f)? bssrdf->radius.y:
-		                                    bssrdf->radius.z;
-	}
-	else {
-		xi -= 2.0f;
-		radius = bssrdf->radius.z;
-	}
-
-	/* Sample BSSRDF. */
-	if(bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
-		bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h);
-	}
-	else if(bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID){
-		bssrdf_gaussian_sample(radius, xi, r, h);
-	}
-	else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
-		bssrdf_burley_sample(radius, xi, r, h);
-	}
+  const Bssrdf *bssrdf = (const Bssrdf *)sc;
+  float radius;
+
+  /* Sample color channel and reuse random number. Only a subset of channels
+   * may be used if their radius was too small to handle as BSSRDF. */
+  xi *= bssrdf->channels;
+
+  if (xi < 1.0f) {
+    radius = (bssrdf->radius.x > 0.0f) ?
+                 bssrdf->radius.x :
+                 (bssrdf->radius.y > 0.0f) ? bssrdf->radius.y : bssrdf->radius.z;
+  }
+  else if (xi < 2.0f) {
+    xi -= 1.0f;
+    radius = (bssrdf->radius.x > 0.0f) ? bssrdf->radius.y : bssrdf->radius.z;
+  }
+  else {
+    xi -= 2.0f;
+    radius = bssrdf->radius.z;
+  }
+
+  /* Sample BSSRDF. */
+  if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
+    bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h);
+  }
+  else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
+    bssrdf_gaussian_sample(radius, xi, r, h);
+  }
+  else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
+    bssrdf_burley_sample(radius, xi, r, h);
+  }
 }
 
 ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r)
 {
-	if(radius == 0.0f) {
-		return 0.0f;
-	}
-	else if(bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
-		return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r);
-	}
-	else if(bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
-		return bssrdf_gaussian_pdf(radius, r);
-	}
-	else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
-		return bssrdf_burley_pdf(radius, r);
-	}
+  if (radius == 0.0f) {
+    return 0.0f;
+  }
+  else if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
+    return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r);
+  }
+  else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
+    return bssrdf_gaussian_pdf(radius, r);
+  }
+  else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
+    return bssrdf_burley_pdf(radius, r);
+  }
 }
 
 ccl_device_forceinline float3 bssrdf_eval(const ShaderClosure *sc, float r)
 {
-	const Bssrdf *bssrdf = (const Bssrdf*)sc;
+  const Bssrdf *bssrdf = (const Bssrdf *)sc;
 
-	return make_float3(
-		bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r),
-		bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r),
-		bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r));
+  return make_float3(bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r),
+                     bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r),
+                     bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r));
 }
 
 ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r)
 {
-	const Bssrdf *bssrdf = (const Bssrdf*)sc;
-	float3 pdf = bssrdf_eval(sc, r);
+  const Bssrdf *bssrdf = (const Bssrdf *)sc;
+  float3 pdf = bssrdf_eval(sc, r);
 
-	return (pdf.x + pdf.y + pdf.z) / bssrdf->channels;
+  return (pdf.x + pdf.y + pdf.z) / bssrdf->channels;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_BSSRDF_H__ */
+#endif /* __KERNEL_BSSRDF_H__ */
diff --git a/intern/cycles/kernel/closure/emissive.h b/intern/cycles/kernel/closure/emissive.h
index a7f4a2a7327..911382e6865 100644
--- a/intern/cycles/kernel/closure/emissive.h
+++ b/intern/cycles/kernel/closure/emissive.h
@@ -36,26 +36,26 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device void background_setup(ShaderData *sd, const float3 weight)
 {
-	if(sd->flag & SD_EMISSION) {
-		sd->closure_emission_background += weight;
-	}
-	else {
-		sd->flag |= SD_EMISSION;
-		sd->closure_emission_background = weight;
-	}
+  if (sd->flag & SD_EMISSION) {
+    sd->closure_emission_background += weight;
+  }
+  else {
+    sd->flag |= SD_EMISSION;
+    sd->closure_emission_background = weight;
+  }
 }
 
 /* EMISSION CLOSURE */
 
 ccl_device void emission_setup(ShaderData *sd, const float3 weight)
 {
-	if(sd->flag & SD_EMISSION) {
-		sd->closure_emission_background += weight;
-	}
-	else {
-		sd->flag |= SD_EMISSION;
-		sd->closure_emission_background = weight;
-	}
+  if (sd->flag & SD_EMISSION) {
+    sd->closure_emission_background += weight;
+  }
+  else {
+    sd->flag |= SD_EMISSION;
+    sd->closure_emission_background = weight;
+  }
 }
 
 /* return the probability distribution function in the direction I,
@@ -63,21 +63,21 @@ ccl_device void emission_setup(ShaderData *sd, const float3 weight)
  * the PDF computed by sample(). */
 ccl_device float emissive_pdf(const float3 Ng, const float3 I)
 {
-	float cosNO = fabsf(dot(Ng, I));
-	return (cosNO > 0.0f)? 1.0f: 0.0f;
+  float cosNO = fabsf(dot(Ng, I));
+  return (cosNO > 0.0f) ? 1.0f : 0.0f;
 }
 
-ccl_device void emissive_sample(const float3 Ng, float randu, float randv,
-	float3 *omega_out, float *pdf)
+ccl_device void emissive_sample(
+    const float3 Ng, float randu, float randv, float3 *omega_out, float *pdf)
 {
-	/* todo: not implemented and used yet */
+  /* todo: not implemented and used yet */
 }
 
 ccl_device float3 emissive_simple_eval(const float3 Ng, const float3 I)
 {
-	float res = emissive_pdf(Ng, I);
+  float res = emissive_pdf(Ng, I);
 
-	return make_float3(res, res, res);
+  return make_float3(res, res, res);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h
index 872d06c936a..473bc0e8a82 100644
--- a/intern/cycles/kernel/closure/volume.h
+++ b/intern/cycles/kernel/closure/volume.h
@@ -23,21 +23,21 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device void volume_extinction_setup(ShaderData *sd, float3 weight)
 {
-	if(sd->flag & SD_EXTINCTION) {
-		sd->closure_transparent_extinction += weight;
-	}
-	else {
-		sd->flag |= SD_EXTINCTION;
-		sd->closure_transparent_extinction = weight;
-	}
+  if (sd->flag & SD_EXTINCTION) {
+    sd->closure_transparent_extinction += weight;
+  }
+  else {
+    sd->flag |= SD_EXTINCTION;
+    sd->closure_transparent_extinction = weight;
+  }
 }
 
 /* HENYEY-GREENSTEIN CLOSURE */
 
 typedef ccl_addr_space struct HenyeyGreensteinVolume {
-	SHADER_CLOSURE_BASE;
+  SHADER_CLOSURE_BASE;
 
-	float g;
+  float g;
 } HenyeyGreensteinVolume;
 
 /* Given cosine between rays, return probability density that a photon bounces
@@ -45,119 +45,152 @@ typedef ccl_addr_space struct HenyeyGreensteinVolume {
  * uniform sphere. g=0 uniform diffuse-like, g=1 close to sharp single ray. */
 ccl_device float single_peaked_henyey_greenstein(float cos_theta, float g)
 {
-	return ((1.0f - g * g) / safe_powf(1.0f + g * g - 2.0f * g * cos_theta, 1.5f)) * (M_1_PI_F * 0.25f);
+  return ((1.0f - g * g) / safe_powf(1.0f + g * g - 2.0f * g * cos_theta, 1.5f)) *
+         (M_1_PI_F * 0.25f);
 };
 
 ccl_device int volume_henyey_greenstein_setup(HenyeyGreensteinVolume *volume)
 {
-	volume->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID;
+  volume->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID;
 
-	/* clamp anisotropy to avoid delta function */
-	volume->g = signf(volume->g) * min(fabsf(volume->g), 1.0f - 1e-3f);
+  /* clamp anisotropy to avoid delta function */
+  volume->g = signf(volume->g) * min(fabsf(volume->g), 1.0f - 1e-3f);
 
-	return SD_SCATTER;
+  return SD_SCATTER;
 }
 
 ccl_device bool volume_henyey_greenstein_merge(const ShaderClosure *a, const ShaderClosure *b)
 {
-	const HenyeyGreensteinVolume *volume_a = (const HenyeyGreensteinVolume*)a;
-	const HenyeyGreensteinVolume *volume_b = (const HenyeyGreensteinVolume*)b;
+  const HenyeyGreensteinVolume *volume_a = (const HenyeyGreensteinVolume *)a;
+  const HenyeyGreensteinVolume *volume_b = (const HenyeyGreensteinVolume *)b;
 
-	return (volume_a->g == volume_b->g);
+  return (volume_a->g == volume_b->g);
 }
 
-ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc, const float3 I, float3 omega_in, float *pdf)
+ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc,
+                                                      const float3 I,
+                                                      float3 omega_in,
+                                                      float *pdf)
 {
-	const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume*)sc;
-	float g = volume->g;
-
-	/* note that I points towards the viewer */
-	if(fabsf(g) < 1e-3f) {
-		*pdf = M_1_PI_F * 0.25f;
-	}
-	else {
-		float cos_theta = dot(-I, omega_in);
-		*pdf = single_peaked_henyey_greenstein(cos_theta, g);
-	}
-
-	return make_float3(*pdf, *pdf, *pdf);
+  const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume *)sc;
+  float g = volume->g;
+
+  /* note that I points towards the viewer */
+  if (fabsf(g) < 1e-3f) {
+    *pdf = M_1_PI_F * 0.25f;
+  }
+  else {
+    float cos_theta = dot(-I, omega_in);
+    *pdf = single_peaked_henyey_greenstein(cos_theta, g);
+  }
+
+  return make_float3(*pdf, *pdf, *pdf);
 }
 
-ccl_device float3 henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pdf)
+ccl_device float3
+henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pdf)
 {
-	/* match pdf for small g */
-	float cos_theta;
-	bool isotropic = fabsf(g) < 1e-3f;
-
-	if(isotropic) {
-		cos_theta = (1.0f - 2.0f * randu);
-		if(pdf) {
-			*pdf = M_1_PI_F * 0.25f;
-		}
-	}
-	else {
-		float k = (1.0f - g * g) / (1.0f - g + 2.0f * g * randu);
-		cos_theta = (1.0f + g * g - k * k) / (2.0f * g);
-		if(pdf) {
-			*pdf = single_peaked_henyey_greenstein(cos_theta, g);
-		}
-	}
-
-	float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
-	float phi = M_2PI_F * randv;
-	float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta);
-
-	float3 T, B;
-	make_orthonormals(D, &T, &B);
-	dir = dir.x * T + dir.y * B + dir.z * D;
-
-	return dir;
+  /* match pdf for small g */
+  float cos_theta;
+  bool isotropic = fabsf(g) < 1e-3f;
+
+  if (isotropic) {
+    cos_theta = (1.0f - 2.0f * randu);
+    if (pdf) {
+      *pdf = M_1_PI_F * 0.25f;
+    }
+  }
+  else {
+    float k = (1.0f - g * g) / (1.0f - g + 2.0f * g * randu);
+    cos_theta = (1.0f + g * g - k * k) / (2.0f * g);
+    if (pdf) {
+      *pdf = single_peaked_henyey_greenstein(cos_theta, g);
+    }
+  }
+
+  float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
+  float phi = M_2PI_F * randv;
+  float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta);
+
+  float3 T, B;
+  make_orthonormals(D, &T, &B);
+  dir = dir.x * T + dir.y * B + dir.z * D;
+
+  return dir;
 }
 
-ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc, float3 I, float3 dIdx, float3 dIdy, float randu, float randv,
-	float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc,
+                                               float3 I,
+                                               float3 dIdx,
+                                               float3 dIdy,
+                                               float randu,
+                                               float randv,
+                                               float3 *eval,
+                                               float3 *omega_in,
+                                               float3 *domega_in_dx,
+                                               float3 *domega_in_dy,
+                                               float *pdf)
 {
-	const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume*)sc;
-	float g = volume->g;
+  const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume *)sc;
+  float g = volume->g;
 
-	/* note that I points towards the viewer and so is used negated */
-	*omega_in = henyey_greenstrein_sample(-I, g, randu, randv, pdf);
-	*eval = make_float3(*pdf, *pdf, *pdf); /* perfect importance sampling */
+  /* note that I points towards the viewer and so is used negated */
+  *omega_in = henyey_greenstrein_sample(-I, g, randu, randv, pdf);
+  *eval = make_float3(*pdf, *pdf, *pdf); /* perfect importance sampling */
 
 #ifdef __RAY_DIFFERENTIALS__
-	/* todo: implement ray differential estimation */
-	*domega_in_dx = make_float3(0.0f, 0.0f, 0.0f);
-	*domega_in_dy = make_float3(0.0f, 0.0f, 0.0f);
+  /* todo: implement ray differential estimation */
+  *domega_in_dx = make_float3(0.0f, 0.0f, 0.0f);
+  *domega_in_dy = make_float3(0.0f, 0.0f, 0.0f);
 #endif
 
-	return LABEL_VOLUME_SCATTER;
+  return LABEL_VOLUME_SCATTER;
 }
 
 /* VOLUME CLOSURE */
 
-ccl_device float3 volume_phase_eval(const ShaderData *sd, const ShaderClosure *sc, float3 omega_in, float *pdf)
+ccl_device float3 volume_phase_eval(const ShaderData *sd,
+                                    const ShaderClosure *sc,
+                                    float3 omega_in,
+                                    float *pdf)
 {
-	kernel_assert(sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID);
+  kernel_assert(sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID);
 
-	return volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
+  return volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
 }
 
-ccl_device int volume_phase_sample(const ShaderData *sd, const ShaderClosure *sc, float randu,
-	float randv, float3 *eval, float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device int volume_phase_sample(const ShaderData *sd,
+                                   const ShaderClosure *sc,
+                                   float randu,
+                                   float randv,
+                                   float3 *eval,
+                                   float3 *omega_in,
+                                   differential3 *domega_in,
+                                   float *pdf)
 {
-	int label;
-
-	switch(sc->type) {
-		case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
-			label = volume_henyey_greenstein_sample(sc, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
-		default:
-			*eval = make_float3(0.0f, 0.0f, 0.0f);
-			label = LABEL_NONE;
-			break;
-	}
-
-	return label;
+  int label;
+
+  switch (sc->type) {
+    case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
+      label = volume_henyey_greenstein_sample(sc,
+                                              sd->I,
+                                              sd->dI.dx,
+                                              sd->dI.dy,
+                                              randu,
+                                              randv,
+                                              eval,
+                                              omega_in,
+                                              &domega_in->dx,
+                                              &domega_in->dy,
+                                              pdf);
+      break;
+    default:
+      *eval = make_float3(0.0f, 0.0f, 0.0f);
+      label = LABEL_NONE;
+      break;
+  }
+
+  return label;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter.h b/intern/cycles/kernel/filter/filter.h
index 4209d69ee73..b067e53a8bf 100644
--- a/intern/cycles/kernel/filter/filter.h
+++ b/intern/cycles/kernel/filter/filter.h
@@ -25,8 +25,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-#define KERNEL_NAME_JOIN(x, y, z) x ## _ ## y ## _ ## z
-#define KERNEL_NAME_EVAL(arch, name)  KERNEL_NAME_JOIN(kernel, arch, name)
+#define KERNEL_NAME_JOIN(x, y, z) x##_##y##_##z
+#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name)
 #define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name)
 
 #define KERNEL_ARCH cpu
@@ -49,4 +49,4 @@ CCL_NAMESPACE_BEGIN
 
 CCL_NAMESPACE_END
 
-#endif  /* __FILTER_H__ */
+#endif /* __FILTER_H__ */
diff --git a/intern/cycles/kernel/filter/filter_defines.h b/intern/cycles/kernel/filter/filter_defines.h
index cb04aac35f4..0e51eeef92f 100644
--- a/intern/cycles/kernel/filter/filter_defines.h
+++ b/intern/cycles/kernel/filter/filter_defines.h
@@ -18,59 +18,56 @@
 #define __FILTER_DEFINES_H__
 
 #define DENOISE_FEATURES 11
-#define TRANSFORM_SIZE (DENOISE_FEATURES*DENOISE_FEATURES)
-#define XTWX_SIZE      (((DENOISE_FEATURES+1)*(DENOISE_FEATURES+2))/2)
-#define XTWY_SIZE      (DENOISE_FEATURES+1)
+#define TRANSFORM_SIZE (DENOISE_FEATURES * DENOISE_FEATURES)
+#define XTWX_SIZE (((DENOISE_FEATURES + 1) * (DENOISE_FEATURES + 2)) / 2)
+#define XTWY_SIZE (DENOISE_FEATURES + 1)
 
 #define DENOISE_MAX_FRAMES 16
 
 typedef struct TileInfo {
-	int offsets[9];
-	int strides[9];
-	int x[4];
-	int y[4];
-	int from_render;
-	int frames[DENOISE_MAX_FRAMES];
-	int num_frames;
-	/* TODO(lukas): CUDA doesn't have uint64_t... */
+  int offsets[9];
+  int strides[9];
+  int x[4];
+  int y[4];
+  int from_render;
+  int frames[DENOISE_MAX_FRAMES];
+  int num_frames;
+  /* TODO(lukas): CUDA doesn't have uint64_t... */
 #ifdef __KERNEL_OPENCL__
-	ccl_global float *buffers[9];
+  ccl_global float *buffers[9];
 #else
-	long long int buffers[9];
+  long long int buffers[9];
 #endif
 } TileInfo;
 
 #ifdef __KERNEL_OPENCL__
-#  define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info,  \
-                               ccl_global float *tile_buffer_1, \
-                               ccl_global float *tile_buffer_2, \
-                               ccl_global float *tile_buffer_3, \
-                               ccl_global float *tile_buffer_4, \
-                               ccl_global float *tile_buffer_5, \
-                               ccl_global float *tile_buffer_6, \
-                               ccl_global float *tile_buffer_7, \
-                               ccl_global float *tile_buffer_8, \
-                               ccl_global float *tile_buffer_9
-#  define CCL_FILTER_TILE_INFO_ARG tile_info, \
-                                   tile_buffer_1, tile_buffer_2, tile_buffer_3, \
-                                   tile_buffer_4, tile_buffer_5, tile_buffer_6, \
-                                   tile_buffer_7, tile_buffer_8, tile_buffer_9
-#  define ccl_get_tile_buffer(id) (id == 0 ? tile_buffer_1 \
-                                   : id == 1 ? tile_buffer_2 \
-                                   : id == 2 ? tile_buffer_3 \
-                                   : id == 3 ? tile_buffer_4 \
-                                   : id == 4 ? tile_buffer_5 \
-                                   : id == 5 ? tile_buffer_6 \
-                                   : id == 6 ? tile_buffer_7 \
-                                   : id == 7 ? tile_buffer_8 \
-                                   : tile_buffer_9)
+#  define CCL_FILTER_TILE_INFO \
+    ccl_global TileInfo *tile_info, ccl_global float *tile_buffer_1, \
+        ccl_global float *tile_buffer_2, ccl_global float *tile_buffer_3, \
+        ccl_global float *tile_buffer_4, ccl_global float *tile_buffer_5, \
+        ccl_global float *tile_buffer_6, ccl_global float *tile_buffer_7, \
+        ccl_global float *tile_buffer_8, ccl_global float *tile_buffer_9
+#  define CCL_FILTER_TILE_INFO_ARG \
+    tile_info, tile_buffer_1, tile_buffer_2, tile_buffer_3, tile_buffer_4, tile_buffer_5, \
+        tile_buffer_6, tile_buffer_7, tile_buffer_8, tile_buffer_9
+#  define ccl_get_tile_buffer(id) \
+    (id == 0 ? tile_buffer_1 : \
+               id == 1 ? \
+               tile_buffer_2 : \
+               id == 2 ? \
+               tile_buffer_3 : \
+               id == 3 ? tile_buffer_4 : \
+                         id == 4 ? tile_buffer_5 : \
+                                   id == 5 ? tile_buffer_6 : \
+                                             id == 6 ? tile_buffer_7 : \
+                                                       id == 7 ? tile_buffer_8 : tile_buffer_9)
 #else
 #  ifdef __KERNEL_CUDA__
-#    define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info
+#    define CCL_FILTER_TILE_INFO ccl_global TileInfo *tile_info
 #  else
-#    define CCL_FILTER_TILE_INFO TileInfo* tile_info
+#    define CCL_FILTER_TILE_INFO TileInfo *tile_info
 #  endif
 #  define ccl_get_tile_buffer(id) (tile_info->buffers[id])
 #endif
 
-#endif  /* __FILTER_DEFINES_H__*/
+#endif /* __FILTER_DEFINES_H__*/
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index e1ea6487aa9..809ccfe8be6 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -14,22 +14,25 @@
  * limitations under the License.
  */
 
- CCL_NAMESPACE_BEGIN
+CCL_NAMESPACE_BEGIN
 
 #define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride]
 
 /* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always points to the current pixel in the first pass.
  * Repeat the loop for every secondary frame if there are any. */
-#define FOR_PIXEL_WINDOW     for(int frame = 0; frame < tile_info->num_frames; frame++) { \
-                                 pixel.z = tile_info->frames[frame]; \
-                                 pixel_buffer = buffer + (low.y - rect.y)*buffer_w + (low.x - rect.x) + frame*frame_stride; \
-                                 for(pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
-                                     for(pixel.x = low.x; pixel.x < high.x; pixel.x++, pixel_buffer++) {
+#define FOR_PIXEL_WINDOW \
+  for (int frame = 0; frame < tile_info->num_frames; frame++) { \
+    pixel.z = tile_info->frames[frame]; \
+    pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \
+                   frame * frame_stride; \
+    for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
+      for (pixel.x = low.x; pixel.x < high.x; pixel.x++, pixel_buffer++) {
 
-#define END_FOR_PIXEL_WINDOW         } \
-                                     pixel_buffer += buffer_w - (high.x - low.x); \
-                                 } \
-                             }
+#define END_FOR_PIXEL_WINDOW \
+  } \
+  pixel_buffer += buffer_w - (high.x - low.x); \
+  } \
+  }
 
 ccl_device_inline void filter_get_features(int3 pixel,
                                            const ccl_global float *ccl_restrict buffer,
@@ -38,24 +41,24 @@ ccl_device_inline void filter_get_features(int3 pixel,
                                            const float *ccl_restrict mean,
                                            int pass_stride)
 {
-	features[0] = pixel.x;
-	features[1] = pixel.y;
-	features[2] = fabsf(ccl_get_feature(buffer, 0));
-	features[3] = ccl_get_feature(buffer, 1);
-	features[4] = ccl_get_feature(buffer, 2);
-	features[5] = ccl_get_feature(buffer, 3);
-	features[6] = ccl_get_feature(buffer, 4);
-	features[7] = ccl_get_feature(buffer, 5);
-	features[8] = ccl_get_feature(buffer, 6);
-	features[9] = ccl_get_feature(buffer, 7);
-	if(use_time) {
-		features[10] = pixel.z;
-	}
-	if(mean) {
-		for(int i = 0; i < (use_time? 11 : 10); i++) {
-			features[i] -= mean[i];
-		}
-	}
+  features[0] = pixel.x;
+  features[1] = pixel.y;
+  features[2] = fabsf(ccl_get_feature(buffer, 0));
+  features[3] = ccl_get_feature(buffer, 1);
+  features[4] = ccl_get_feature(buffer, 2);
+  features[5] = ccl_get_feature(buffer, 3);
+  features[6] = ccl_get_feature(buffer, 4);
+  features[7] = ccl_get_feature(buffer, 5);
+  features[8] = ccl_get_feature(buffer, 6);
+  features[9] = ccl_get_feature(buffer, 7);
+  if (use_time) {
+    features[10] = pixel.z;
+  }
+  if (mean) {
+    for (int i = 0; i < (use_time ? 11 : 10); i++) {
+      features[i] -= mean[i];
+    }
+  }
 }
 
 ccl_device_inline void filter_get_feature_scales(int3 pixel,
@@ -65,38 +68,39 @@ ccl_device_inline void filter_get_feature_scales(int3 pixel,
                                                  const float *ccl_restrict mean,
                                                  int pass_stride)
 {
-	scales[0] = fabsf(pixel.x - mean[0]);
-	scales[1] = fabsf(pixel.y - mean[1]);
-	scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]);
-	scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3],
-	                                    ccl_get_feature(buffer, 2) - mean[4],
-	                                    ccl_get_feature(buffer, 3) - mean[5]));
-	scales[4] = fabsf(ccl_get_feature(buffer, 4) - mean[6]);
-	scales[5] = len_squared(make_float3(ccl_get_feature(buffer, 5) - mean[7],
-	                                    ccl_get_feature(buffer, 6) - mean[8],
-	                                    ccl_get_feature(buffer, 7) - mean[9]));
-	if(use_time) {
-		scales[6] = fabsf(pixel.z - mean[10]);
-	}
+  scales[0] = fabsf(pixel.x - mean[0]);
+  scales[1] = fabsf(pixel.y - mean[1]);
+  scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]);
+  scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3],
+                                      ccl_get_feature(buffer, 2) - mean[4],
+                                      ccl_get_feature(buffer, 3) - mean[5]));
+  scales[4] = fabsf(ccl_get_feature(buffer, 4) - mean[6]);
+  scales[5] = len_squared(make_float3(ccl_get_feature(buffer, 5) - mean[7],
+                                      ccl_get_feature(buffer, 6) - mean[8],
+                                      ccl_get_feature(buffer, 7) - mean[9]));
+  if (use_time) {
+    scales[6] = fabsf(pixel.z - mean[10]);
+  }
 }
 
 ccl_device_inline void filter_calculate_scale(float *scale, bool use_time)
 {
-	scale[0] = 1.0f/max(scale[0], 0.01f);
-	scale[1] = 1.0f/max(scale[1], 0.01f);
-	scale[2] = 1.0f/max(scale[2], 0.01f);
-	if(use_time) {
-		scale[10] = 1.0f/max(scale[6], 0.01f);
-	}
-	scale[6] = 1.0f/max(scale[4], 0.01f);
-	scale[7] = scale[8] = scale[9] = 1.0f/max(sqrtf(scale[5]), 0.01f);
-	scale[3] = scale[4] = scale[5] = 1.0f/max(sqrtf(scale[3]), 0.01f);
+  scale[0] = 1.0f / max(scale[0], 0.01f);
+  scale[1] = 1.0f / max(scale[1], 0.01f);
+  scale[2] = 1.0f / max(scale[2], 0.01f);
+  if (use_time) {
+    scale[10] = 1.0f / max(scale[6], 0.01f);
+  }
+  scale[6] = 1.0f / max(scale[4], 0.01f);
+  scale[7] = scale[8] = scale[9] = 1.0f / max(sqrtf(scale[5]), 0.01f);
+  scale[3] = scale[4] = scale[5] = 1.0f / max(sqrtf(scale[3]), 0.01f);
 }
 
 ccl_device_inline float3 filter_get_color(const ccl_global float *ccl_restrict buffer,
                                           int pass_stride)
 {
-	return make_float3(ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10));
+  return make_float3(
+      ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10));
 }
 
 ccl_device_inline void design_row_add(float *design_row,
@@ -107,42 +111,44 @@ ccl_device_inline void design_row_add(float *design_row,
                                       float feature,
                                       int transform_row_stride)
 {
-	for(int i = 0; i < rank; i++) {
-		design_row[1+i] += transform[(row*transform_row_stride + i)*stride]*feature;
-	}
+  for (int i = 0; i < rank; i++) {
+    design_row[1 + i] += transform[(row * transform_row_stride + i) * stride] * feature;
+  }
 }
 
 /* Fill the design row. */
-ccl_device_inline void filter_get_design_row_transform(int3 p_pixel,
-                                                       const ccl_global float *ccl_restrict p_buffer,
-                                                       int3 q_pixel,
-                                                       const ccl_global float *ccl_restrict q_buffer,
-                                                       int pass_stride,
-                                                       int rank,
-                                                       float *design_row,
-                                                       const ccl_global float *ccl_restrict transform,
-                                                       int stride,
-                                                       bool use_time)
+ccl_device_inline void filter_get_design_row_transform(
+    int3 p_pixel,
+    const ccl_global float *ccl_restrict p_buffer,
+    int3 q_pixel,
+    const ccl_global float *ccl_restrict q_buffer,
+    int pass_stride,
+    int rank,
+    float *design_row,
+    const ccl_global float *ccl_restrict transform,
+    int stride,
+    bool use_time)
 {
-	int num_features = use_time? 11 : 10;
+  int num_features = use_time ? 11 : 10;
 
-	design_row[0] = 1.0f;
-	math_vector_zero(design_row+1, rank);
+  design_row[0] = 1.0f;
+  math_vector_zero(design_row + 1, rank);
 
-#define DESIGN_ROW_ADD(I, F) design_row_add(design_row, rank, transform, stride, I, F, num_features);
-	DESIGN_ROW_ADD(0, q_pixel.x - p_pixel.x);
-	DESIGN_ROW_ADD(1, q_pixel.y - p_pixel.y);
-	DESIGN_ROW_ADD(2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0)));
-	DESIGN_ROW_ADD(3,       ccl_get_feature(q_buffer, 1)  -       ccl_get_feature(p_buffer, 1));
-	DESIGN_ROW_ADD(4,       ccl_get_feature(q_buffer, 2)  -       ccl_get_feature(p_buffer, 2));
-	DESIGN_ROW_ADD(5,       ccl_get_feature(q_buffer, 3)  -       ccl_get_feature(p_buffer, 3));
-	DESIGN_ROW_ADD(6,       ccl_get_feature(q_buffer, 4)  -       ccl_get_feature(p_buffer, 4));
-	DESIGN_ROW_ADD(7,       ccl_get_feature(q_buffer, 5)  -       ccl_get_feature(p_buffer, 5));
-	DESIGN_ROW_ADD(8,       ccl_get_feature(q_buffer, 6)  -       ccl_get_feature(p_buffer, 6));
-	DESIGN_ROW_ADD(9,       ccl_get_feature(q_buffer, 7)  -       ccl_get_feature(p_buffer, 7));
-	if(use_time) {
-		DESIGN_ROW_ADD(10, q_pixel.z - p_pixel.z)
-	}
+#define DESIGN_ROW_ADD(I, F) \
+  design_row_add(design_row, rank, transform, stride, I, F, num_features);
+  DESIGN_ROW_ADD(0, q_pixel.x - p_pixel.x);
+  DESIGN_ROW_ADD(1, q_pixel.y - p_pixel.y);
+  DESIGN_ROW_ADD(2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0)));
+  DESIGN_ROW_ADD(3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1));
+  DESIGN_ROW_ADD(4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2));
+  DESIGN_ROW_ADD(5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3));
+  DESIGN_ROW_ADD(6, ccl_get_feature(q_buffer, 4) - ccl_get_feature(p_buffer, 4));
+  DESIGN_ROW_ADD(7, ccl_get_feature(q_buffer, 5) - ccl_get_feature(p_buffer, 5));
+  DESIGN_ROW_ADD(8, ccl_get_feature(q_buffer, 6) - ccl_get_feature(p_buffer, 6));
+  DESIGN_ROW_ADD(9, ccl_get_feature(q_buffer, 7) - ccl_get_feature(p_buffer, 7));
+  if (use_time) {
+    DESIGN_ROW_ADD(10, q_pixel.z - p_pixel.z)
+  }
 #undef DESIGN_ROW_ADD
 }
 
diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h
index 5dd001ffb93..1e0d6e93453 100644
--- a/intern/cycles/kernel/filter/filter_features_sse.h
+++ b/intern/cycles/kernel/filter/filter_features_sse.h
@@ -22,22 +22,27 @@ CCL_NAMESPACE_BEGIN
  * pixel_buffer always points to the first of the 4 current pixel in the first pass.
  * x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set for all pixels within the window.
  * Repeat the loop for every secondary frame if there are any. */
-#define FOR_PIXEL_WINDOW_SSE     for(int frame = 0; frame < tile_info->num_frames; frame++) { \
-                                     pixel.z = tile_info->frames[frame]; \
-                                     pixel_buffer = buffer + (low.y - rect.y)*buffer_w + (low.x - rect.x) + frame*frame_stride; \
-                                     float4 t4 = make_float4(pixel.z); \
-                                     for(pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
-                                         float4 y4 = make_float4(pixel.y); \
-                                         for(pixel.x = low.x; pixel.x < high.x; pixel.x += 4, pixel_buffer += 4) { \
-                                             float4 x4 = make_float4(pixel.x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); \
-                                             int4 active_pixels = x4 < make_float4(high.x);
+#define FOR_PIXEL_WINDOW_SSE \
+  for (int frame = 0; frame < tile_info->num_frames; frame++) { \
+    pixel.z = tile_info->frames[frame]; \
+    pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \
+                   frame * frame_stride; \
+    float4 t4 = make_float4(pixel.z); \
+    for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
+      float4 y4 = make_float4(pixel.y); \
+      for (pixel.x = low.x; pixel.x < high.x; pixel.x += 4, pixel_buffer += 4) { \
+        float4 x4 = make_float4(pixel.x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); \
+        int4 active_pixels = x4 < make_float4(high.x);
 
-#define END_FOR_PIXEL_WINDOW_SSE         } \
-                                         pixel_buffer += buffer_w - (high.x - low.x); \
-                                     } \
-                                 }
+#define END_FOR_PIXEL_WINDOW_SSE \
+  } \
+  pixel_buffer += buffer_w - (high.x - low.x); \
+  } \
+  }
 
-ccl_device_inline void filter_get_features_sse(float4 x, float4 y, float4 t,
+ccl_device_inline void filter_get_features_sse(float4 x,
+                                               float4 y,
+                                               float4 t,
                                                int4 active_pixels,
                                                const float *ccl_restrict buffer,
                                                float4 *features,
@@ -45,33 +50,35 @@ ccl_device_inline void filter_get_features_sse(float4 x, float4 y, float4 t,
                                                const float4 *ccl_restrict mean,
                                                int pass_stride)
 {
-	int num_features = use_time? 11 : 10;
+  int num_features = use_time ? 11 : 10;
 
-	features[0] = x;
-	features[1] = y;
-	features[2] = fabs(ccl_get_feature_sse(0));
-	features[3] = ccl_get_feature_sse(1);
-	features[4] = ccl_get_feature_sse(2);
-	features[5] = ccl_get_feature_sse(3);
-	features[6] = ccl_get_feature_sse(4);
-	features[7] = ccl_get_feature_sse(5);
-	features[8] = ccl_get_feature_sse(6);
-	features[9] = ccl_get_feature_sse(7);
-	if(use_time) {
-		features[10] = t;
-	}
+  features[0] = x;
+  features[1] = y;
+  features[2] = fabs(ccl_get_feature_sse(0));
+  features[3] = ccl_get_feature_sse(1);
+  features[4] = ccl_get_feature_sse(2);
+  features[5] = ccl_get_feature_sse(3);
+  features[6] = ccl_get_feature_sse(4);
+  features[7] = ccl_get_feature_sse(5);
+  features[8] = ccl_get_feature_sse(6);
+  features[9] = ccl_get_feature_sse(7);
+  if (use_time) {
+    features[10] = t;
+  }
 
-	if(mean) {
-		for(int i = 0; i < num_features; i++) {
-			features[i] = features[i] - mean[i];
-		}
-	}
-	for(int i = 0; i < num_features; i++) {
-		features[i] = mask(active_pixels, features[i]);
-	}
+  if (mean) {
+    for (int i = 0; i < num_features; i++) {
+      features[i] = features[i] - mean[i];
+    }
+  }
+  for (int i = 0; i < num_features; i++) {
+    features[i] = mask(active_pixels, features[i]);
+  }
 }
 
-ccl_device_inline void filter_get_feature_scales_sse(float4 x, float4 y, float4 t,
+ccl_device_inline void filter_get_feature_scales_sse(float4 x,
+                                                     float4 y,
+                                                     float4 t,
                                                      int4 active_pixels,
                                                      const float *ccl_restrict buffer,
                                                      float4 *scales,
@@ -79,36 +86,34 @@ ccl_device_inline void filter_get_feature_scales_sse(float4 x, float4 y, float4
                                                      const float4 *ccl_restrict mean,
                                                      int pass_stride)
 {
-	scales[0] = fabs(x - mean[0]);
-	scales[1] = fabs(y - mean[1]);
-	scales[2] = fabs(fabs(ccl_get_feature_sse(0)) - mean[2]);
-	scales[3] = sqr(ccl_get_feature_sse(1) - mean[3]) +
-	            sqr(ccl_get_feature_sse(2) - mean[4]) +
-	            sqr(ccl_get_feature_sse(3) - mean[5]);
-	scales[4] = fabs(ccl_get_feature_sse(4) - mean[6]);
-	scales[5] = sqr(ccl_get_feature_sse(5) - mean[7]) +
-	            sqr(ccl_get_feature_sse(6) - mean[8]) +
-	            sqr(ccl_get_feature_sse(7) - mean[9]);
-	if(use_time) {
-		scales[6] = fabs(t - mean[10]);
-	}
+  scales[0] = fabs(x - mean[0]);
+  scales[1] = fabs(y - mean[1]);
+  scales[2] = fabs(fabs(ccl_get_feature_sse(0)) - mean[2]);
+  scales[3] = sqr(ccl_get_feature_sse(1) - mean[3]) + sqr(ccl_get_feature_sse(2) - mean[4]) +
+              sqr(ccl_get_feature_sse(3) - mean[5]);
+  scales[4] = fabs(ccl_get_feature_sse(4) - mean[6]);
+  scales[5] = sqr(ccl_get_feature_sse(5) - mean[7]) + sqr(ccl_get_feature_sse(6) - mean[8]) +
+              sqr(ccl_get_feature_sse(7) - mean[9]);
+  if (use_time) {
+    scales[6] = fabs(t - mean[10]);
+  }
 
-	for(int i = 0; i < (use_time? 7 : 6); i++)
-		scales[i] = mask(active_pixels, scales[i]);
+  for (int i = 0; i < (use_time ? 7 : 6); i++)
+    scales[i] = mask(active_pixels, scales[i]);
 }
 
 ccl_device_inline void filter_calculate_scale_sse(float4 *scale, bool use_time)
 {
-	scale[0] = rcp(max(reduce_max(scale[0]), make_float4(0.01f)));
-	scale[1] = rcp(max(reduce_max(scale[1]), make_float4(0.01f)));
-	scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f)));
-	if(use_time) {
-		scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f)));;
-	}
-	scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f)));
-	scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f)));
-	scale[3] = scale[4] = scale[5] = rcp(max(reduce_max(sqrt(scale[3])), make_float4(0.01f)));
+  scale[0] = rcp(max(reduce_max(scale[0]), make_float4(0.01f)));
+  scale[1] = rcp(max(reduce_max(scale[1]), make_float4(0.01f)));
+  scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f)));
+  if (use_time) {
+    scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f)));
+    ;
+  }
+  scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f)));
+  scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f)));
+  scale[3] = scale[4] = scale[5] = rcp(max(reduce_max(sqrt(scale[3])), make_float4(0.01f)));
 }
 
-
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h
index 9eb3c603a4a..a94266a8786 100644
--- a/intern/cycles/kernel/filter/filter_nlm_cpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h
@@ -16,10 +16,11 @@
 
 CCL_NAMESPACE_BEGIN
 
-#define load4_a(buf, ofs) (*((float4*) ((buf) + (ofs))))
-#define load4_u(buf, ofs) load_float4((buf)+(ofs))
+#define load4_a(buf, ofs) (*((float4 *)((buf) + (ofs))))
+#define load4_u(buf, ofs) load_float4((buf) + (ofs))
 
-ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
+ccl_device_inline void kernel_filter_nlm_calc_difference(int dx,
+                                                         int dy,
                                                          const float *ccl_restrict weight_image,
                                                          const float *ccl_restrict variance_image,
                                                          const float *ccl_restrict scale_image,
@@ -31,122 +32,117 @@ ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
                                                          float a,
                                                          float k_2)
 {
-	/* Strides need to be aligned to 16 bytes. */
-	kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0);
-
-	int aligned_lowx = rect.x & (~3);
-	const int numChannels = (channel_offset > 0)? 3 : 1;
-	const float4 channel_fac = make_float4(1.0f / numChannels);
-
-	for(int y = rect.y; y < rect.w; y++) {
-		int idx_p = y*stride + aligned_lowx;
-		int idx_q = (y+dy)*stride + aligned_lowx + dx + frame_offset;
-		for(int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
-			float4 diff = make_float4(0.0f);
-			float4 scale_fac;
-			if(scale_image) {
-				scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q),
-				                  make_float4(0.25f), make_float4(4.0f));
-			}
-			else {
-				scale_fac = make_float4(1.0f);
-			}
-			for(int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
-				/* idx_p is guaranteed to be aligned, but idx_q isn't. */
-				float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
-				float4 color_q = scale_fac*load4_u(weight_image, idx_q + chan_ofs);
-				float4 cdiff = color_p - color_q;
-				float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
-				float4 var_q = sqr(scale_fac)*load4_u(variance_image, idx_q + chan_ofs);
-				diff += (cdiff*cdiff - a*(var_p + min(var_p, var_q))) / (make_float4(1e-8f) + k_2*(var_p+var_q));
-			}
-			load4_a(difference_image, idx_p) = diff*channel_fac;
-		}
-	}
+  /* Strides need to be aligned to 16 bytes. */
+  kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0);
+
+  int aligned_lowx = rect.x & (~3);
+  const int numChannels = (channel_offset > 0) ? 3 : 1;
+  const float4 channel_fac = make_float4(1.0f / numChannels);
+
+  for (int y = rect.y; y < rect.w; y++) {
+    int idx_p = y * stride + aligned_lowx;
+    int idx_q = (y + dy) * stride + aligned_lowx + dx + frame_offset;
+    for (int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
+      float4 diff = make_float4(0.0f);
+      float4 scale_fac;
+      if (scale_image) {
+        scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q),
+                          make_float4(0.25f),
+                          make_float4(4.0f));
+      }
+      else {
+        scale_fac = make_float4(1.0f);
+      }
+      for (int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
+        /* idx_p is guaranteed to be aligned, but idx_q isn't. */
+        float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
+        float4 color_q = scale_fac * load4_u(weight_image, idx_q + chan_ofs);
+        float4 cdiff = color_p - color_q;
+        float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
+        float4 var_q = sqr(scale_fac) * load4_u(variance_image, idx_q + chan_ofs);
+        diff += (cdiff * cdiff - a * (var_p + min(var_p, var_q))) /
+                (make_float4(1e-8f) + k_2 * (var_p + var_q));
+      }
+      load4_a(difference_image, idx_p) = diff * channel_fac;
+    }
+  }
 }
 
-ccl_device_inline void kernel_filter_nlm_blur(const float *ccl_restrict difference_image,
-                                              float *out_image,
-                                              int4 rect,
-                                              int stride,
-                                              int f)
+ccl_device_inline void kernel_filter_nlm_blur(
+    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
 {
-	int aligned_lowx = round_down(rect.x, 4);
-	for(int y = rect.y; y < rect.w; y++) {
-		const int low = max(rect.y, y-f);
-		const int high = min(rect.w, y+f+1);
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			load4_a(out_image, y*stride + x) = make_float4(0.0f);
-		}
-		for(int y1 = low; y1 < high; y1++) {
-			for(int x = aligned_lowx; x < rect.z; x += 4) {
-				load4_a(out_image, y*stride + x) += load4_a(difference_image, y1*stride + x);
-			}
-		}
-		float fac = 1.0f/(high - low);
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			load4_a(out_image, y*stride + x) *= fac;
-		}
-	}
+  int aligned_lowx = round_down(rect.x, 4);
+  for (int y = rect.y; y < rect.w; y++) {
+    const int low = max(rect.y, y - f);
+    const int high = min(rect.w, y + f + 1);
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      load4_a(out_image, y * stride + x) = make_float4(0.0f);
+    }
+    for (int y1 = low; y1 < high; y1++) {
+      for (int x = aligned_lowx; x < rect.z; x += 4) {
+        load4_a(out_image, y * stride + x) += load4_a(difference_image, y1 * stride + x);
+      }
+    }
+    float fac = 1.0f / (high - low);
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      load4_a(out_image, y * stride + x) *= fac;
+    }
+  }
 }
 
-ccl_device_inline void nlm_blur_horizontal(const float *ccl_restrict difference_image,
-                                           float *out_image,
-                                           int4 rect,
-                                           int stride,
-                                           int f)
+ccl_device_inline void nlm_blur_horizontal(
+    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
 {
-	int aligned_lowx = round_down(rect.x, 4);
-	for(int y = rect.y; y < rect.w; y++) {
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			load4_a(out_image, y*stride + x) = make_float4(0.0f);
-		}
-	}
-
-	for(int dx = -f; dx <= f; dx++) {
-		aligned_lowx = round_down(rect.x - min(0, dx), 4);
-		int highx = rect.z - max(0, dx);
-		int4 lowx4 = make_int4(rect.x - min(0, dx));
-		int4 highx4 = make_int4(rect.z - max(0, dx));
-		for(int y = rect.y; y < rect.w; y++) {
-			for(int x = aligned_lowx; x < highx; x += 4) {
-				int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
-				int4 active = (x4 >= lowx4) & (x4 < highx4);
-
-				float4 diff = load4_u(difference_image, y*stride + x + dx);
-				load4_a(out_image, y*stride + x) += mask(active, diff);
-			}
-		}
-	}
-
-	aligned_lowx = round_down(rect.x, 4);
-	for(int y = rect.y; y < rect.w; y++) {
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f);
-			float4 low = max(make_float4(rect.x), x4 - make_float4(f));
-			float4 high = min(make_float4(rect.z), x4 + make_float4(f+1));
-			load4_a(out_image, y*stride + x) *= rcp(high - low);
-		}
-	}
+  int aligned_lowx = round_down(rect.x, 4);
+  for (int y = rect.y; y < rect.w; y++) {
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      load4_a(out_image, y * stride + x) = make_float4(0.0f);
+    }
+  }
+
+  for (int dx = -f; dx <= f; dx++) {
+    aligned_lowx = round_down(rect.x - min(0, dx), 4);
+    int highx = rect.z - max(0, dx);
+    int4 lowx4 = make_int4(rect.x - min(0, dx));
+    int4 highx4 = make_int4(rect.z - max(0, dx));
+    for (int y = rect.y; y < rect.w; y++) {
+      for (int x = aligned_lowx; x < highx; x += 4) {
+        int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
+        int4 active = (x4 >= lowx4) & (x4 < highx4);
+
+        float4 diff = load4_u(difference_image, y * stride + x + dx);
+        load4_a(out_image, y * stride + x) += mask(active, diff);
+      }
+    }
+  }
+
+  aligned_lowx = round_down(rect.x, 4);
+  for (int y = rect.y; y < rect.w; y++) {
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f);
+      float4 low = max(make_float4(rect.x), x4 - make_float4(f));
+      float4 high = min(make_float4(rect.z), x4 + make_float4(f + 1));
+      load4_a(out_image, y * stride + x) *= rcp(high - low);
+    }
+  }
 }
 
-ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict difference_image,
-                                                     float *out_image,
-                                                     int4 rect,
-                                                     int stride,
-                                                     int f)
+ccl_device_inline void kernel_filter_nlm_calc_weight(
+    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
 {
-	nlm_blur_horizontal(difference_image, out_image, rect, stride, f);
-
-	int aligned_lowx = round_down(rect.x, 4);
-	for(int y = rect.y; y < rect.w; y++) {
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			load4_a(out_image, y*stride + x) = fast_expf4(-max(load4_a(out_image, y*stride + x), make_float4(0.0f)));
-		}
-	}
+  nlm_blur_horizontal(difference_image, out_image, rect, stride, f);
+
+  int aligned_lowx = round_down(rect.x, 4);
+  for (int y = rect.y; y < rect.w; y++) {
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      load4_a(out_image, y * stride + x) = fast_expf4(
+          -max(load4_a(out_image, y * stride + x), make_float4(0.0f)));
+    }
+  }
 }
 
-ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
+ccl_device_inline void kernel_filter_nlm_update_output(int dx,
+                                                       int dy,
                                                        const float *ccl_restrict difference_image,
                                                        const float *ccl_restrict image,
                                                        float *temp_image,
@@ -157,33 +153,36 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
                                                        int stride,
                                                        int f)
 {
-	nlm_blur_horizontal(difference_image, temp_image, rect, stride, f);
+  nlm_blur_horizontal(difference_image, temp_image, rect, stride, f);
 
-	int aligned_lowx = round_down(rect.x, 4);
-	for(int y = rect.y; y < rect.w; y++) {
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
-			int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z));
+  int aligned_lowx = round_down(rect.x, 4);
+  for (int y = rect.y; y < rect.w; y++) {
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
+      int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z));
 
-			int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx);
+      int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx);
 
-			float4 weight = load4_a(temp_image, idx_p);
-			load4_a(accum_image, idx_p) += mask(active, weight);
+      float4 weight = load4_a(temp_image, idx_p);
+      load4_a(accum_image, idx_p) += mask(active, weight);
 
-			float4 val = load4_u(image, idx_q);
-			if(channel_offset) {
-				val += load4_u(image, idx_q + channel_offset);
-				val += load4_u(image, idx_q + 2*channel_offset);
-				val *= 1.0f/3.0f;
-			}
+      float4 val = load4_u(image, idx_q);
+      if (channel_offset) {
+        val += load4_u(image, idx_q + channel_offset);
+        val += load4_u(image, idx_q + 2 * channel_offset);
+        val *= 1.0f / 3.0f;
+      }
 
-			load4_a(out_image, idx_p) += mask(active, weight*val);
-		}
-	}
+      load4_a(out_image, idx_p) += mask(active, weight * val);
+    }
+  }
 }
 
-ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, int t,
-                                                           const float *ccl_restrict difference_image,
+ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx,
+                                                           int dy,
+                                                           int t,
+                                                           const float *ccl_restrict
+                                                               difference_image,
                                                            const float *ccl_restrict buffer,
                                                            float *transform,
                                                            int *rank,
@@ -191,40 +190,49 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, int t
                                                            float3 *XtWY,
                                                            int4 rect,
                                                            int4 filter_window,
-                                                           int stride, int f,
+                                                           int stride,
+                                                           int f,
                                                            int pass_stride,
                                                            int frame_offset,
                                                            bool use_time)
 {
-	int4 clip_area = rect_clip(rect, filter_window);
-	/* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */
-	for(int y = clip_area.y; y < clip_area.w; y++) {
-		for(int x = clip_area.x; x < clip_area.z; x++) {
-			const int low = max(rect.x, x-f);
-			const int high = min(rect.z, x+f+1);
-			float sum = 0.0f;
-			for(int x1 = low; x1 < high; x1++) {
-				sum += difference_image[y*stride + x1];
-			}
-			float weight = sum * (1.0f/(high - low));
-
-			int storage_ofs = coord_to_local_index(filter_window, x, y);
-			float  *l_transform = transform + storage_ofs*TRANSFORM_SIZE;
-			float  *l_XtWX = XtWX + storage_ofs*XTWX_SIZE;
-			float3 *l_XtWY = XtWY + storage_ofs*XTWY_SIZE;
-			int    *l_rank = rank + storage_ofs;
-
-			kernel_filter_construct_gramian(x, y, 1,
-			                                dx, dy, t,
-			                                stride,
-			                                pass_stride,
-			                                frame_offset,
-			                                use_time,
-			                                buffer,
-			                                l_transform, l_rank,
-			                                weight, l_XtWX, l_XtWY, 0);
-		}
-	}
+  int4 clip_area = rect_clip(rect, filter_window);
+  /* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */
+  for (int y = clip_area.y; y < clip_area.w; y++) {
+    for (int x = clip_area.x; x < clip_area.z; x++) {
+      const int low = max(rect.x, x - f);
+      const int high = min(rect.z, x + f + 1);
+      float sum = 0.0f;
+      for (int x1 = low; x1 < high; x1++) {
+        sum += difference_image[y * stride + x1];
+      }
+      float weight = sum * (1.0f / (high - low));
+
+      int storage_ofs = coord_to_local_index(filter_window, x, y);
+      float *l_transform = transform + storage_ofs * TRANSFORM_SIZE;
+      float *l_XtWX = XtWX + storage_ofs * XTWX_SIZE;
+      float3 *l_XtWY = XtWY + storage_ofs * XTWY_SIZE;
+      int *l_rank = rank + storage_ofs;
+
+      kernel_filter_construct_gramian(x,
+                                      y,
+                                      1,
+                                      dx,
+                                      dy,
+                                      t,
+                                      stride,
+                                      pass_stride,
+                                      frame_offset,
+                                      use_time,
+                                      buffer,
+                                      l_transform,
+                                      l_rank,
+                                      weight,
+                                      l_XtWX,
+                                      l_XtWY,
+                                      0);
+    }
+  }
 }
 
 ccl_device_inline void kernel_filter_nlm_normalize(float *out_image,
@@ -232,11 +240,11 @@ ccl_device_inline void kernel_filter_nlm_normalize(float *out_image,
                                                    int4 rect,
                                                    int w)
 {
-	for(int y = rect.y; y < rect.w; y++) {
-		for(int x = rect.x; x < rect.z; x++) {
-			out_image[y*w+x] /= accum_image[y*w+x];
-		}
-	}
+  for (int y = rect.y; y < rect.w; y++) {
+    for (int x = rect.x; x < rect.z; x++) {
+      out_image[y * w + x] /= accum_image[y * w + x];
+    }
+  }
 }
 
 #undef load4_a
diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h
index 12636393243..650c743f34f 100644
--- a/intern/cycles/kernel/filter/filter_nlm_gpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_gpu.h
@@ -24,203 +24,232 @@ CCL_NAMESPACE_BEGIN
  * Window is the rect that should be processed.
  * co is filled with (x, y, dx, dy).
  */
-ccl_device_inline bool get_nlm_coords_window(int w, int h, int r, int stride,
-                                             int4 *rect, int4 *co, int *ofs,
-                                             int4 window)
+ccl_device_inline bool get_nlm_coords_window(
+    int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs, int4 window)
 {
-	/* Determine the pixel offset that this thread should apply. */
-	int s = 2*r+1;
-	int si = ccl_global_id(1);
-	int sx = si % s;
-	int sy = si / s;
-	if(sy >= s) {
-		return false;
-	}
-
-	/* Pixels still need to lie inside the denoising buffer after applying the offset,
-	 * so determine the area for which this is the case. */
-	int dx = sx - r;
-	int dy = sy - r;
-
-	*rect = make_int4(max(0, -dx),     max(0, -dy),
-	              w - max(0,  dx), h - max(0,  dy));
-
-	/* Find the intersection of the area that we want to process (window) and the area
-	 * that can be processed (rect) to get the final area for this offset. */
-	int4 clip_area = rect_clip(window, *rect);
-
-	/* If the radius is larger than one of the sides of the window,
-	 * there will be shifts for which there is no usable pixel at all. */
-	if(!rect_is_valid(clip_area)) {
-		return false;
-	}
-
-	/* Map the linear thread index to pixels inside the clip area. */
-	int x, y;
-	if(!local_index_to_coord(clip_area, ccl_global_id(0), &x, &y)) {
-		return false;
-	}
-
-	*co = make_int4(x, y, dx, dy);
-
-	*ofs = (sy*s + sx) * stride;
-
-	return true;
+  /* Determine the pixel offset that this thread should apply. */
+  int s = 2 * r + 1;
+  int si = ccl_global_id(1);
+  int sx = si % s;
+  int sy = si / s;
+  if (sy >= s) {
+    return false;
+  }
+
+  /* Pixels still need to lie inside the denoising buffer after applying the offset,
+   * so determine the area for which this is the case. */
+  int dx = sx - r;
+  int dy = sy - r;
+
+  *rect = make_int4(max(0, -dx), max(0, -dy), w - max(0, dx), h - max(0, dy));
+
+  /* Find the intersection of the area that we want to process (window) and the area
+   * that can be processed (rect) to get the final area for this offset. */
+  int4 clip_area = rect_clip(window, *rect);
+
+  /* If the radius is larger than one of the sides of the window,
+   * there will be shifts for which there is no usable pixel at all. */
+  if (!rect_is_valid(clip_area)) {
+    return false;
+  }
+
+  /* Map the linear thread index to pixels inside the clip area. */
+  int x, y;
+  if (!local_index_to_coord(clip_area, ccl_global_id(0), &x, &y)) {
+    return false;
+  }
+
+  *co = make_int4(x, y, dx, dy);
+
+  *ofs = (sy * s + sx) * stride;
+
+  return true;
 }
 
-ccl_device_inline bool get_nlm_coords(int w, int h, int r, int stride,
-                                      int4 *rect, int4 *co, int *ofs)
+ccl_device_inline bool get_nlm_coords(
+    int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs)
 {
-	return get_nlm_coords_window(w, h, r, stride, rect, co, ofs, make_int4(0, 0, w, h));
+  return get_nlm_coords_window(w, h, r, stride, rect, co, ofs, make_int4(0, 0, w, h));
 }
 
-ccl_device_inline void kernel_filter_nlm_calc_difference(int x, int y,
-                                                         int dx, int dy,
-                                                         const ccl_global float *ccl_restrict weight_image,
-                                                         const ccl_global float *ccl_restrict variance_image,
-                                                         const ccl_global float *ccl_restrict scale_image,
-                                                         ccl_global float *difference_image,
-                                                         int4 rect, int stride,
-                                                         int channel_offset,
-                                                         int frame_offset,
-                                                         float a, float k_2)
+ccl_device_inline void kernel_filter_nlm_calc_difference(
+    int x,
+    int y,
+    int dx,
+    int dy,
+    const ccl_global float *ccl_restrict weight_image,
+    const ccl_global float *ccl_restrict variance_image,
+    const ccl_global float *ccl_restrict scale_image,
+    ccl_global float *difference_image,
+    int4 rect,
+    int stride,
+    int channel_offset,
+    int frame_offset,
+    float a,
+    float k_2)
 {
-	int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx) + frame_offset;
-	int numChannels = channel_offset? 3 : 1;
-
-	float diff = 0.0f;
-	float scale_fac = 1.0f;
-	if(scale_image) {
-		scale_fac = clamp(scale_image[idx_p] / scale_image[idx_q], 0.25f, 4.0f);
-	}
-
-	for(int c = 0; c < numChannels; c++, idx_p += channel_offset, idx_q += channel_offset) {
-		float cdiff = weight_image[idx_p] - scale_fac*weight_image[idx_q];
-		float pvar = variance_image[idx_p];
-		float qvar = sqr(scale_fac)*variance_image[idx_q];
-		diff += (cdiff*cdiff - a*(pvar + min(pvar, qvar))) / (1e-8f + k_2*(pvar+qvar));
-	}
-	if(numChannels > 1) {
-		diff *= 1.0f/numChannels;
-	}
-	difference_image[y*stride + x] = diff;
+  int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx) + frame_offset;
+  int numChannels = channel_offset ? 3 : 1;
+
+  float diff = 0.0f;
+  float scale_fac = 1.0f;
+  if (scale_image) {
+    scale_fac = clamp(scale_image[idx_p] / scale_image[idx_q], 0.25f, 4.0f);
+  }
+
+  for (int c = 0; c < numChannels; c++, idx_p += channel_offset, idx_q += channel_offset) {
+    float cdiff = weight_image[idx_p] - scale_fac * weight_image[idx_q];
+    float pvar = variance_image[idx_p];
+    float qvar = sqr(scale_fac) * variance_image[idx_q];
+    diff += (cdiff * cdiff - a * (pvar + min(pvar, qvar))) / (1e-8f + k_2 * (pvar + qvar));
+  }
+  if (numChannels > 1) {
+    diff *= 1.0f / numChannels;
+  }
+  difference_image[y * stride + x] = diff;
 }
 
-ccl_device_inline void kernel_filter_nlm_blur(int x, int y,
-                                              const ccl_global float *ccl_restrict difference_image,
+ccl_device_inline void kernel_filter_nlm_blur(int x,
+                                              int y,
+                                              const ccl_global float *ccl_restrict
+                                                  difference_image,
                                               ccl_global float *out_image,
-                                              int4 rect, int stride, int f)
+                                              int4 rect,
+                                              int stride,
+                                              int f)
 {
-	float sum = 0.0f;
-	const int low = max(rect.y, y-f);
-	const int high = min(rect.w, y+f+1);
-	for(int y1 = low; y1 < high; y1++) {
-		sum += difference_image[y1*stride + x];
-	}
-	sum *= 1.0f/(high-low);
-	out_image[y*stride + x] = sum;
+  float sum = 0.0f;
+  const int low = max(rect.y, y - f);
+  const int high = min(rect.w, y + f + 1);
+  for (int y1 = low; y1 < high; y1++) {
+    sum += difference_image[y1 * stride + x];
+  }
+  sum *= 1.0f / (high - low);
+  out_image[y * stride + x] = sum;
 }
 
-ccl_device_inline void kernel_filter_nlm_calc_weight(int x, int y,
-                                                     const ccl_global float *ccl_restrict difference_image,
+ccl_device_inline void kernel_filter_nlm_calc_weight(int x,
+                                                     int y,
+                                                     const ccl_global float *ccl_restrict
+                                                         difference_image,
                                                      ccl_global float *out_image,
-                                                     int4 rect, int stride, int f)
+                                                     int4 rect,
+                                                     int stride,
+                                                     int f)
 {
-	float sum = 0.0f;
-	const int low = max(rect.x, x-f);
-	const int high = min(rect.z, x+f+1);
-	for(int x1 = low; x1 < high; x1++) {
-		sum += difference_image[y*stride + x1];
-	}
-	sum *= 1.0f/(high-low);
-	out_image[y*stride + x] = fast_expf(-max(sum, 0.0f));
+  float sum = 0.0f;
+  const int low = max(rect.x, x - f);
+  const int high = min(rect.z, x + f + 1);
+  for (int x1 = low; x1 < high; x1++) {
+    sum += difference_image[y * stride + x1];
+  }
+  sum *= 1.0f / (high - low);
+  out_image[y * stride + x] = fast_expf(-max(sum, 0.0f));
 }
 
-ccl_device_inline void kernel_filter_nlm_update_output(int x, int y,
-                                                       int dx, int dy,
-                                                       const ccl_global float *ccl_restrict difference_image,
+ccl_device_inline void kernel_filter_nlm_update_output(int x,
+                                                       int y,
+                                                       int dx,
+                                                       int dy,
+                                                       const ccl_global float *ccl_restrict
+                                                           difference_image,
                                                        const ccl_global float *ccl_restrict image,
                                                        ccl_global float *out_image,
                                                        ccl_global float *accum_image,
-                                                       int4 rect, int channel_offset,
-                                                       int stride, int f)
+                                                       int4 rect,
+                                                       int channel_offset,
+                                                       int stride,
+                                                       int f)
 {
-	float sum = 0.0f;
-	const int low = max(rect.x, x-f);
-	const int high = min(rect.z, x+f+1);
-	for(int x1 = low; x1 < high; x1++) {
-		sum += difference_image[y*stride + x1];
-	}
-	sum *= 1.0f/(high-low);
-
-	int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx);
-	if(out_image) {
-		atomic_add_and_fetch_float(accum_image + idx_p, sum);
-
-		float val = image[idx_q];
-		if(channel_offset) {
-			val += image[idx_q + channel_offset];
-			val += image[idx_q + 2*channel_offset];
-			val *= 1.0f/3.0f;
-		}
-		atomic_add_and_fetch_float(out_image + idx_p, sum*val);
-	}
-	else {
-		accum_image[idx_p] = sum;
-	}
+  float sum = 0.0f;
+  const int low = max(rect.x, x - f);
+  const int high = min(rect.z, x + f + 1);
+  for (int x1 = low; x1 < high; x1++) {
+    sum += difference_image[y * stride + x1];
+  }
+  sum *= 1.0f / (high - low);
+
+  int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx);
+  if (out_image) {
+    atomic_add_and_fetch_float(accum_image + idx_p, sum);
+
+    float val = image[idx_q];
+    if (channel_offset) {
+      val += image[idx_q + channel_offset];
+      val += image[idx_q + 2 * channel_offset];
+      val *= 1.0f / 3.0f;
+    }
+    atomic_add_and_fetch_float(out_image + idx_p, sum * val);
+  }
+  else {
+    accum_image[idx_p] = sum;
+  }
 }
 
-ccl_device_inline void kernel_filter_nlm_construct_gramian(int x, int y,
-                                                           int dx, int dy, int t,
-                                                           const ccl_global float *ccl_restrict difference_image,
-                                                           const ccl_global float *ccl_restrict buffer,
-                                                           const ccl_global float *ccl_restrict transform,
-                                                           ccl_global int *rank,
-                                                           ccl_global float *XtWX,
-                                                           ccl_global float3 *XtWY,
-                                                           int4 rect,
-                                                           int4 filter_window,
-                                                           int stride, int f,
-                                                           int pass_stride,
-                                                           int frame_offset,
-                                                           bool use_time,
-                                                           int localIdx)
+ccl_device_inline void kernel_filter_nlm_construct_gramian(
+    int x,
+    int y,
+    int dx,
+    int dy,
+    int t,
+    const ccl_global float *ccl_restrict difference_image,
+    const ccl_global float *ccl_restrict buffer,
+    const ccl_global float *ccl_restrict transform,
+    ccl_global int *rank,
+    ccl_global float *XtWX,
+    ccl_global float3 *XtWY,
+    int4 rect,
+    int4 filter_window,
+    int stride,
+    int f,
+    int pass_stride,
+    int frame_offset,
+    bool use_time,
+    int localIdx)
 {
-	const int low = max(rect.x, x-f);
-	const int high = min(rect.z, x+f+1);
-	float sum = 0.0f;
-	for(int x1 = low; x1 < high; x1++) {
-		sum += difference_image[y*stride + x1];
-	}
-	float weight = sum * (1.0f/(high - low));
-
-	/* Reconstruction data is only stored for pixels inside the filter window,
-	 * so compute the pixels's index in there. */
-	int storage_ofs = coord_to_local_index(filter_window, x, y);
-	transform += storage_ofs;
-	rank += storage_ofs;
-	XtWX += storage_ofs;
-	XtWY += storage_ofs;
-
-	kernel_filter_construct_gramian(x, y,
-	                                rect_size(filter_window),
-	                                dx, dy, t,
-	                                stride,
-	                                pass_stride,
-	                                frame_offset,
-	                                use_time,
-	                                buffer,
-	                                transform, rank,
-	                                weight, XtWX, XtWY,
-	                                localIdx);
+  const int low = max(rect.x, x - f);
+  const int high = min(rect.z, x + f + 1);
+  float sum = 0.0f;
+  for (int x1 = low; x1 < high; x1++) {
+    sum += difference_image[y * stride + x1];
+  }
+  float weight = sum * (1.0f / (high - low));
+
+  /* Reconstruction data is only stored for pixels inside the filter window,
+   * so compute the pixels's index in there. */
+  int storage_ofs = coord_to_local_index(filter_window, x, y);
+  transform += storage_ofs;
+  rank += storage_ofs;
+  XtWX += storage_ofs;
+  XtWY += storage_ofs;
+
+  kernel_filter_construct_gramian(x,
+                                  y,
+                                  rect_size(filter_window),
+                                  dx,
+                                  dy,
+                                  t,
+                                  stride,
+                                  pass_stride,
+                                  frame_offset,
+                                  use_time,
+                                  buffer,
+                                  transform,
+                                  rank,
+                                  weight,
+                                  XtWX,
+                                  XtWY,
+                                  localIdx);
 }
 
-ccl_device_inline void kernel_filter_nlm_normalize(int x, int y,
+ccl_device_inline void kernel_filter_nlm_normalize(int x,
+                                                   int y,
                                                    ccl_global float *out_image,
-                                                   const ccl_global float *ccl_restrict accum_image,
+                                                   const ccl_global float *ccl_restrict
+                                                       accum_image,
                                                    int stride)
 {
-	out_image[y*stride + x] /= accum_image[y*stride + x];
+  out_image[y * stride + x] /= accum_image[y * stride + x];
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
index e24f4feb28d..8211311313d 100644
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ b/intern/cycles/kernel/filter/filter_prefilter.h
@@ -27,7 +27,8 @@ CCL_NAMESPACE_BEGIN
  */
 ccl_device void kernel_filter_divide_shadow(int sample,
                                             CCL_FILTER_TILE_INFO,
-                                            int x, int y,
+                                            int x,
+                                            int y,
                                             ccl_global float *unfilteredA,
                                             ccl_global float *unfilteredB,
                                             ccl_global float *sampleVariance,
@@ -37,37 +38,39 @@ ccl_device void kernel_filter_divide_shadow(int sample,
                                             int buffer_pass_stride,
                                             int buffer_denoising_offset)
 {
-	int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2);
-	int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2);
-	int tile = ytile*3+xtile;
+  int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
+  int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
+  int tile = ytile * 3 + xtile;
 
-	int offset = tile_info->offsets[tile];
-	int stride = tile_info->strides[tile];
-	const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) ccl_get_tile_buffer(tile);
-	center_buffer += (y*stride + x + offset)*buffer_pass_stride;
-	center_buffer += buffer_denoising_offset + 14;
+  int offset = tile_info->offsets[tile];
+  int stride = tile_info->strides[tile];
+  const ccl_global float *ccl_restrict center_buffer = (ccl_global float *)ccl_get_tile_buffer(
+      tile);
+  center_buffer += (y * stride + x + offset) * buffer_pass_stride;
+  center_buffer += buffer_denoising_offset + 14;
 
-	int buffer_w = align_up(rect.z - rect.x, 4);
-	int idx = (y-rect.y)*buffer_w + (x - rect.x);
-	unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f);
-	unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f);
+  int buffer_w = align_up(rect.z - rect.x, 4);
+  int idx = (y - rect.y) * buffer_w + (x - rect.x);
+  unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f);
+  unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f);
 
-	float varA = center_buffer[2];
-	float varB = center_buffer[5];
-	int odd_sample = (sample+1)/2;
-	int even_sample = sample/2;
+  float varA = center_buffer[2];
+  float varB = center_buffer[5];
+  int odd_sample = (sample + 1) / 2;
+  int even_sample = sample / 2;
 
-	/* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
-	 * update does not work efficiently with atomics in the kernel. */
-	varA = max(0.0f, varA - unfilteredA[idx]*unfilteredA[idx]*odd_sample);
-	varB = max(0.0f, varB - unfilteredB[idx]*unfilteredB[idx]*even_sample);
+  /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
+   * update does not work efficiently with atomics in the kernel. */
+  varA = max(0.0f, varA - unfilteredA[idx] * unfilteredA[idx] * odd_sample);
+  varB = max(0.0f, varB - unfilteredB[idx] * unfilteredB[idx] * even_sample);
 
-	varA /= max(odd_sample - 1, 1);
-	varB /= max(even_sample - 1, 1);
+  varA /= max(odd_sample - 1, 1);
+  varB /= max(even_sample - 1, 1);
 
-	sampleVariance[idx]  = 0.5f*(varA + varB) / sample;
-	sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample*sample);
-	bufferVariance[idx]  = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) * (unfilteredA[idx] - unfilteredB[idx]);
+  sampleVariance[idx] = 0.5f * (varA + varB) / sample;
+  sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample * sample);
+  bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) *
+                        (unfilteredA[idx] - unfilteredB[idx]);
 }
 
 /* Load a regular feature from the render buffers into the denoise buffer.
@@ -80,55 +83,65 @@ ccl_device void kernel_filter_divide_shadow(int sample,
  */
 ccl_device void kernel_filter_get_feature(int sample,
                                           CCL_FILTER_TILE_INFO,
-                                          int m_offset, int v_offset,
-                                          int x, int y,
+                                          int m_offset,
+                                          int v_offset,
+                                          int x,
+                                          int y,
                                           ccl_global float *mean,
                                           ccl_global float *variance,
                                           float scale,
-                                          int4 rect, int buffer_pass_stride,
+                                          int4 rect,
+                                          int buffer_pass_stride,
                                           int buffer_denoising_offset)
 {
-	int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2);
-	int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2);
-	int tile = ytile*3+xtile;
-	ccl_global float *center_buffer = ((ccl_global float*) ccl_get_tile_buffer(tile)) + (tile_info->offsets[tile] + y*tile_info->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset;
+  int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
+  int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
+  int tile = ytile * 3 + xtile;
+  ccl_global float *center_buffer = ((ccl_global float *)ccl_get_tile_buffer(tile)) +
+                                    (tile_info->offsets[tile] + y * tile_info->strides[tile] + x) *
+                                        buffer_pass_stride +
+                                    buffer_denoising_offset;
 
-	int buffer_w = align_up(rect.z - rect.x, 4);
-	int idx = (y-rect.y)*buffer_w + (x - rect.x);
+  int buffer_w = align_up(rect.z - rect.x, 4);
+  int idx = (y - rect.y) * buffer_w + (x - rect.x);
 
-	float val = scale * center_buffer[m_offset];
-	mean[idx] = val;
+  float val = scale * center_buffer[m_offset];
+  mean[idx] = val;
 
-	if(v_offset >= 0) {
-		if(sample > 1) {
-			/* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
-			 * update does not work efficiently with atomics in the kernel. */
-			variance[idx] = max(0.0f, (center_buffer[v_offset] - val*val*sample) / (sample * (sample-1)));
-		}
-		else {
-			/* Can't compute variance with single sample, just set it very high. */
-			variance[idx] = 1e10f;
-		}
-	}
+  if (v_offset >= 0) {
+    if (sample > 1) {
+      /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
+       * update does not work efficiently with atomics in the kernel. */
+      variance[idx] = max(
+          0.0f, (center_buffer[v_offset] - val * val * sample) / (sample * (sample - 1)));
+    }
+    else {
+      /* Can't compute variance with single sample, just set it very high. */
+      variance[idx] = 1e10f;
+    }
+  }
 }
 
 ccl_device void kernel_filter_write_feature(int sample,
-                                            int x, int y,
+                                            int x,
+                                            int y,
                                             int4 buffer_params,
                                             ccl_global float *from,
                                             ccl_global float *buffer,
                                             int out_offset,
                                             int4 rect)
 {
-	ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
+  ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
+                                                   buffer_params.z;
 
-	int buffer_w = align_up(rect.z - rect.x, 4);
-	int idx = (y-rect.y)*buffer_w + (x - rect.x);
+  int buffer_w = align_up(rect.z - rect.x, 4);
+  int idx = (y - rect.y) * buffer_w + (x - rect.x);
 
-	combined_buffer[out_offset] = from[idx];
+  combined_buffer[out_offset] = from[idx];
 }
 
-ccl_device void kernel_filter_detect_outliers(int x, int y,
+ccl_device void kernel_filter_detect_outliers(int x,
+                                              int y,
                                               ccl_global float *image,
                                               ccl_global float *variance,
                                               ccl_global float *depth,
@@ -136,123 +149,131 @@ ccl_device void kernel_filter_detect_outliers(int x, int y,
                                               int4 rect,
                                               int pass_stride)
 {
-	int buffer_w = align_up(rect.z - rect.x, 4);
+  int buffer_w = align_up(rect.z - rect.x, 4);
 
-	int n = 0;
-	float values[25];
-	float pixel_variance, max_variance = 0.0f;
-	for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) {
-		for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) {
-			int idx = (y1-rect.y)*buffer_w + (x1-rect.x);
-			float3 color = make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]);
-			color = max(color, make_float3(0.0f, 0.0f, 0.0f));
-			float L = average(color);
+  int n = 0;
+  float values[25];
+  float pixel_variance, max_variance = 0.0f;
+  for (int y1 = max(y - 2, rect.y); y1 < min(y + 3, rect.w); y1++) {
+    for (int x1 = max(x - 2, rect.x); x1 < min(x + 3, rect.z); x1++) {
+      int idx = (y1 - rect.y) * buffer_w + (x1 - rect.x);
+      float3 color = make_float3(
+          image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]);
+      color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+      float L = average(color);
 
-			/* Find the position of L. */
-			int i;
-			for(i = 0; i < n; i++) {
-				if(values[i] > L) break;
-			}
-			/* Make space for L by shifting all following values to the right. */
-			for(int j = n; j > i; j--) {
-				values[j] = values[j-1];
-			}
-			/* Insert L. */
-			values[i] = L;
-			n++;
+      /* Find the position of L. */
+      int i;
+      for (i = 0; i < n; i++) {
+        if (values[i] > L)
+          break;
+      }
+      /* Make space for L by shifting all following values to the right. */
+      for (int j = n; j > i; j--) {
+        values[j] = values[j - 1];
+      }
+      /* Insert L. */
+      values[i] = L;
+      n++;
 
-			float3 pixel_var = make_float3(variance[idx], variance[idx+pass_stride], variance[idx+2*pass_stride]);
-			float var = average(pixel_var);
-			if((x1 == x) && (y1 == y)) {
-				pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f)? -1.0f : var;
-			}
-			else {
-				max_variance = max(max_variance, var);
-			}
-		}
-	}
+      float3 pixel_var = make_float3(
+          variance[idx], variance[idx + pass_stride], variance[idx + 2 * pass_stride]);
+      float var = average(pixel_var);
+      if ((x1 == x) && (y1 == y)) {
+        pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f) ? -1.0f :
+                                                                                            var;
+      }
+      else {
+        max_variance = max(max_variance, var);
+      }
+    }
+  }
 
-	max_variance += 1e-4f;
+  max_variance += 1e-4f;
 
-	int idx = (y-rect.y)*buffer_w + (x-rect.x);
-	float3 color = make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]);
-	color = max(color, make_float3(0.0f, 0.0f, 0.0f));
-	float L = average(color);
+  int idx = (y - rect.y) * buffer_w + (x - rect.x);
+  float3 color = make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]);
+  color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+  float L = average(color);
 
-	float ref = 2.0f*values[(int)(n*0.75f)];
+  float ref = 2.0f * values[(int)(n * 0.75f)];
 
-	/* Slightly offset values to avoid false positives in (almost) black areas. */
-	max_variance += 1e-5f;
-	ref -= 1e-5f;
+  /* Slightly offset values to avoid false positives in (almost) black areas. */
+  max_variance += 1e-5f;
+  ref -= 1e-5f;
 
-	if(L > ref) {
-		/* The pixel appears to be an outlier.
-		 * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel
-		 * should actually be at the reference value:
-		 * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier.
-		 * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight.
-		 */
+  if (L > ref) {
+    /* The pixel appears to be an outlier.
+     * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel
+     * should actually be at the reference value:
+     * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier.
+     * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight.
+     */
 
-		if(pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) {
-			depth[idx] = -depth[idx];
-			color *= ref/L;
-			variance[idx] = variance[idx + pass_stride] = variance[idx + 2*pass_stride] = max_variance;
-		}
-		else {
-			float stddev = sqrtf(pixel_variance);
-			if(L - 3*stddev < ref) {
-				/* The pixel is an outlier, so negate the depth value to mark it as one.
-				* Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
-				depth[idx] = -depth[idx];
-				float fac = ref/L;
-				color *= fac;
-				variance[idx              ] *= fac*fac;
-				variance[idx + pass_stride] *= fac*fac;
-				variance[idx+2*pass_stride] *= fac*fac;
-			}
-		}
-	}
-	out[idx              ] = color.x;
-	out[idx + pass_stride] = color.y;
-	out[idx+2*pass_stride] = color.z;
+    if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) {
+      depth[idx] = -depth[idx];
+      color *= ref / L;
+      variance[idx] = variance[idx + pass_stride] = variance[idx + 2 * pass_stride] = max_variance;
+    }
+    else {
+      float stddev = sqrtf(pixel_variance);
+      if (L - 3 * stddev < ref) {
+        /* The pixel is an outlier, so negate the depth value to mark it as one.
+        * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
+        depth[idx] = -depth[idx];
+        float fac = ref / L;
+        color *= fac;
+        variance[idx] *= fac * fac;
+        variance[idx + pass_stride] *= fac * fac;
+        variance[idx + 2 * pass_stride] *= fac * fac;
+      }
+    }
+  }
+  out[idx] = color.x;
+  out[idx + pass_stride] = color.y;
+  out[idx + 2 * pass_stride] = color.z;
 }
 
 /* Combine A/B buffers.
  * Calculates the combined mean and the buffer variance. */
-ccl_device void kernel_filter_combine_halves(int x, int y,
+ccl_device void kernel_filter_combine_halves(int x,
+                                             int y,
                                              ccl_global float *mean,
                                              ccl_global float *variance,
                                              ccl_global float *a,
                                              ccl_global float *b,
-                                             int4 rect, int r)
+                                             int4 rect,
+                                             int r)
 {
-	int buffer_w = align_up(rect.z - rect.x, 4);
-	int idx = (y-rect.y)*buffer_w + (x - rect.x);
+  int buffer_w = align_up(rect.z - rect.x, 4);
+  int idx = (y - rect.y) * buffer_w + (x - rect.x);
 
-	if(mean)     mean[idx] = 0.5f * (a[idx]+b[idx]);
-	if(variance) {
-		if(r == 0) variance[idx] = 0.25f * (a[idx]-b[idx])*(a[idx]-b[idx]);
-		else {
-			variance[idx] = 0.0f;
-			float values[25];
-			int numValues = 0;
-			for(int py = max(y-r, rect.y); py < min(y+r+1, rect.w); py++) {
-				for(int px = max(x-r, rect.x); px < min(x+r+1, rect.z); px++) {
-					int pidx = (py-rect.y)*buffer_w + (px-rect.x);
-					values[numValues++] = 0.25f * (a[pidx]-b[pidx])*(a[pidx]-b[pidx]);
-				}
-			}
-			/* Insertion-sort the variances (fast enough for 25 elements). */
-			for(int i = 1; i < numValues; i++) {
-				float v = values[i];
-				int j;
-				for(j = i-1; j >= 0 && values[j] > v; j--)
-					values[j+1] = values[j];
-				values[j+1] = v;
-			}
-			variance[idx] = values[(7*numValues)/8];
-		}
-	}
+  if (mean)
+    mean[idx] = 0.5f * (a[idx] + b[idx]);
+  if (variance) {
+    if (r == 0)
+      variance[idx] = 0.25f * (a[idx] - b[idx]) * (a[idx] - b[idx]);
+    else {
+      variance[idx] = 0.0f;
+      float values[25];
+      int numValues = 0;
+      for (int py = max(y - r, rect.y); py < min(y + r + 1, rect.w); py++) {
+        for (int px = max(x - r, rect.x); px < min(x + r + 1, rect.z); px++) {
+          int pidx = (py - rect.y) * buffer_w + (px - rect.x);
+          values[numValues++] = 0.25f * (a[pidx] - b[pidx]) * (a[pidx] - b[pidx]);
+        }
+      }
+      /* Insertion-sort the variances (fast enough for 25 elements). */
+      for (int i = 1; i < numValues; i++) {
+        float v = values[i];
+        int j;
+        for (j = i - 1; j >= 0 && values[j] > v; j--)
+          values[j + 1] = values[j];
+        values[j + 1] = v;
+      }
+      variance[idx] = values[(7 * numValues) / 8];
+    }
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h
index ceda8f71f98..850f20584da 100644
--- a/intern/cycles/kernel/filter/filter_reconstruction.h
+++ b/intern/cycles/kernel/filter/filter_reconstruction.h
@@ -16,63 +16,75 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
+ccl_device_inline void kernel_filter_construct_gramian(int x,
+                                                       int y,
                                                        int storage_stride,
-                                                       int dx, int dy, int t,
+                                                       int dx,
+                                                       int dy,
+                                                       int t,
                                                        int buffer_stride,
                                                        int pass_stride,
                                                        int frame_offset,
                                                        bool use_time,
                                                        const ccl_global float *ccl_restrict buffer,
-                                                       const ccl_global float *ccl_restrict transform,
+                                                       const ccl_global float *ccl_restrict
+                                                           transform,
                                                        ccl_global int *rank,
                                                        float weight,
                                                        ccl_global float *XtWX,
                                                        ccl_global float3 *XtWY,
                                                        int localIdx)
 {
-	if(weight < 1e-3f) {
-		return;
-	}
+  if (weight < 1e-3f) {
+    return;
+  }
 
-	int p_offset =  y     * buffer_stride +  x;
-	int q_offset = (y+dy) * buffer_stride + (x+dx) + frame_offset;
+  int p_offset = y * buffer_stride + x;
+  int q_offset = (y + dy) * buffer_stride + (x + dx) + frame_offset;
 
 #ifdef __KERNEL_GPU__
-	const int stride = storage_stride;
+  const int stride = storage_stride;
 #else
-	const int stride = 1;
-	(void) storage_stride;
+  const int stride = 1;
+  (void)storage_stride;
 #endif
 
 #ifdef __KERNEL_CUDA__
-	ccl_local float shared_design_row[(DENOISE_FEATURES+1)*CCL_MAX_LOCAL_SIZE];
-	ccl_local_param float *design_row = shared_design_row + localIdx*(DENOISE_FEATURES+1);
+  ccl_local float shared_design_row[(DENOISE_FEATURES + 1) * CCL_MAX_LOCAL_SIZE];
+  ccl_local_param float *design_row = shared_design_row + localIdx * (DENOISE_FEATURES + 1);
 #else
-	float design_row[DENOISE_FEATURES+1];
+  float design_row[DENOISE_FEATURES + 1];
 #endif
 
-	float3 q_color = filter_get_color(buffer + q_offset, pass_stride);
+  float3 q_color = filter_get_color(buffer + q_offset, pass_stride);
 
-	/* If the pixel was flagged as an outlier during prefiltering, skip it. */
-	if(ccl_get_feature(buffer + q_offset, 0) < 0.0f) {
-		return;
-	}
+  /* If the pixel was flagged as an outlier during prefiltering, skip it. */
+  if (ccl_get_feature(buffer + q_offset, 0) < 0.0f) {
+    return;
+  }
 
-	filter_get_design_row_transform(make_int3(x, y, t),       buffer + p_offset,
-	                                make_int3(x+dx, y+dy, t), buffer + q_offset,
-	                                pass_stride, *rank, design_row, transform, stride, use_time);
+  filter_get_design_row_transform(make_int3(x, y, t),
+                                  buffer + p_offset,
+                                  make_int3(x + dx, y + dy, t),
+                                  buffer + q_offset,
+                                  pass_stride,
+                                  *rank,
+                                  design_row,
+                                  transform,
+                                  stride,
+                                  use_time);
 
 #ifdef __KERNEL_GPU__
-	math_trimatrix_add_gramian_strided(XtWX, (*rank)+1, design_row, weight, stride);
-	math_vec3_add_strided(XtWY, (*rank)+1, design_row, weight * q_color, stride);
+  math_trimatrix_add_gramian_strided(XtWX, (*rank) + 1, design_row, weight, stride);
+  math_vec3_add_strided(XtWY, (*rank) + 1, design_row, weight * q_color, stride);
 #else
-	math_trimatrix_add_gramian(XtWX, (*rank)+1, design_row, weight);
-	math_vec3_add(XtWY, (*rank)+1, design_row, weight * q_color);
+  math_trimatrix_add_gramian(XtWX, (*rank) + 1, design_row, weight);
+  math_vec3_add(XtWY, (*rank) + 1, design_row, weight * q_color);
 #endif
 }
 
-ccl_device_inline void kernel_filter_finalize(int x, int y,
+ccl_device_inline void kernel_filter_finalize(int x,
+                                              int y,
                                               ccl_global float *buffer,
                                               ccl_global int *rank,
                                               int storage_stride,
@@ -82,47 +94,47 @@ ccl_device_inline void kernel_filter_finalize(int x, int y,
                                               int sample)
 {
 #ifdef __KERNEL_GPU__
-	const int stride = storage_stride;
+  const int stride = storage_stride;
 #else
-	const int stride = 1;
-	(void) storage_stride;
+  const int stride = 1;
+  (void)storage_stride;
 #endif
 
-	if(XtWX[0] < 1e-3f) {
-		/* There is not enough information to determine a denoised result.
-		 * As a fallback, keep the original value of the pixel. */
-		 return;
-	}
-
-	/* The weighted average of pixel colors (essentially, the NLM-filtered image).
-	 * In case the solution of the linear model fails due to numerical issues or
-	 * returns non-sensical negative values, fall back to this value. */
-	float3 mean_color = XtWY[0]/XtWX[0];
-
-	math_trimatrix_vec3_solve(XtWX, XtWY, (*rank)+1, stride);
-
-	float3 final_color = XtWY[0];
-	if(!isfinite3_safe(final_color) ||
-	   (final_color.x < -0.01f || final_color.y < -0.01f || final_color.z < -0.01f))
-	{
-		final_color = mean_color;
-	}
-
-	/* Clamp pixel value to positive values. */
-	final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f));
-
-	ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
-	if(buffer_params.w >= 0) {
-		final_color *= sample;
-		if(buffer_params.w > 0) {
-			final_color.x += combined_buffer[buffer_params.w+0];
-			final_color.y += combined_buffer[buffer_params.w+1];
-			final_color.z += combined_buffer[buffer_params.w+2];
-		}
-	}
-	combined_buffer[0] = final_color.x;
-	combined_buffer[1] = final_color.y;
-	combined_buffer[2] = final_color.z;
+  if (XtWX[0] < 1e-3f) {
+    /* There is not enough information to determine a denoised result.
+     * As a fallback, keep the original value of the pixel. */
+    return;
+  }
+
+  /* The weighted average of pixel colors (essentially, the NLM-filtered image).
+   * In case the solution of the linear model fails due to numerical issues or
+   * returns non-sensical negative values, fall back to this value. */
+  float3 mean_color = XtWY[0] / XtWX[0];
+
+  math_trimatrix_vec3_solve(XtWX, XtWY, (*rank) + 1, stride);
+
+  float3 final_color = XtWY[0];
+  if (!isfinite3_safe(final_color) ||
+      (final_color.x < -0.01f || final_color.y < -0.01f || final_color.z < -0.01f)) {
+    final_color = mean_color;
+  }
+
+  /* Clamp pixel value to positive values. */
+  final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f));
+
+  ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) *
+                                                   buffer_params.z;
+  if (buffer_params.w >= 0) {
+    final_color *= sample;
+    if (buffer_params.w > 0) {
+      final_color.x += combined_buffer[buffer_params.w + 0];
+      final_color.y += combined_buffer[buffer_params.w + 1];
+      final_color.z += combined_buffer[buffer_params.w + 2];
+    }
+  }
+  combined_buffer[0] = final_color.x;
+  combined_buffer[1] = final_color.y;
+  combined_buffer[2] = final_color.z;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform.h b/intern/cycles/kernel/filter/filter_transform.h
index 94e27bb02fd..69e3c7c458d 100644
--- a/intern/cycles/kernel/filter/filter_transform.h
+++ b/intern/cycles/kernel/filter/filter_transform.h
@@ -18,92 +18,101 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer,
                                                   CCL_FILTER_TILE_INFO,
-                                                  int x, int y, int4 rect,
-                                                  int pass_stride, int frame_stride,
+                                                  int x,
+                                                  int y,
+                                                  int4 rect,
+                                                  int pass_stride,
+                                                  int frame_stride,
                                                   bool use_time,
-                                                  float *transform, int *rank,
-                                                  int radius, float pca_threshold)
+                                                  float *transform,
+                                                  int *rank,
+                                                  int radius,
+                                                  float pca_threshold)
 {
-	int buffer_w = align_up(rect.z - rect.x, 4);
-
-	float features[DENOISE_FEATURES];
-
-	const float *ccl_restrict pixel_buffer;
-	int3 pixel;
-
-	int num_features = use_time? 11 : 10;
-
-	/* === Calculate denoising window. === */
-	int2 low  = make_int2(max(rect.x, x - radius),
-	                      max(rect.y, y - radius));
-	int2 high = make_int2(min(rect.z, x + radius + 1),
-	                      min(rect.w, y + radius + 1));
-	int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
-
-	/* === Shift feature passes to have mean 0. === */
-	float feature_means[DENOISE_FEATURES];
-	math_vector_zero(feature_means, num_features);
-	FOR_PIXEL_WINDOW {
-		filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
-		math_vector_add(feature_means, features, num_features);
-	} END_FOR_PIXEL_WINDOW
-
-	math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
-
-	/* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
-	float feature_scale[DENOISE_FEATURES];
-	math_vector_zero(feature_scale, num_features);
-
-	FOR_PIXEL_WINDOW {
-		filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
-		math_vector_max(feature_scale, features, num_features);
-	} END_FOR_PIXEL_WINDOW
-
-	filter_calculate_scale(feature_scale, use_time);
-
-	/* === Generate the feature transformation. ===
-	 * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
-	 * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
-	float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
-	math_matrix_zero(feature_matrix, num_features);
-	FOR_PIXEL_WINDOW {
-		filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
-		math_vector_mul(features, feature_scale, num_features);
-		math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
-	} END_FOR_PIXEL_WINDOW
-
-	math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
-	*rank = 0;
-	/* Prevent overfitting when a small window is used. */
-	int max_rank = min(num_features, num_pixels/3);
-	if(pca_threshold < 0.0f) {
-		float threshold_energy = 0.0f;
-		for(int i = 0; i < num_features; i++) {
-			threshold_energy += feature_matrix[i*num_features+i];
-		}
-		threshold_energy *= 1.0f - (-pca_threshold);
-
-		float reduced_energy = 0.0f;
-		for(int i = 0; i < max_rank; i++, (*rank)++) {
-			if(i >= 2 && reduced_energy >= threshold_energy)
-				break;
-			float s = feature_matrix[i*num_features+i];
-			reduced_energy += s;
-		}
-	}
-	else {
-		for(int i = 0; i < max_rank; i++, (*rank)++) {
-			float s = feature_matrix[i*num_features+i];
-			if(i >= 2 && sqrtf(s) < pca_threshold)
-				break;
-		}
-	}
-
-	/* Bake the feature scaling into the transformation matrix. */
-	for(int i = 0; i < (*rank); i++) {
-		math_vector_mul(transform + i*num_features, feature_scale, num_features);
-	}
-	math_matrix_transpose(transform, num_features, 1);
+  int buffer_w = align_up(rect.z - rect.x, 4);
+
+  float features[DENOISE_FEATURES];
+
+  const float *ccl_restrict pixel_buffer;
+  int3 pixel;
+
+  int num_features = use_time ? 11 : 10;
+
+  /* === Calculate denoising window. === */
+  int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
+  int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
+  int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
+
+  /* === Shift feature passes to have mean 0. === */
+  float feature_means[DENOISE_FEATURES];
+  math_vector_zero(feature_means, num_features);
+  FOR_PIXEL_WINDOW
+  {
+    filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
+    math_vector_add(feature_means, features, num_features);
+  }
+  END_FOR_PIXEL_WINDOW
+
+  math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
+
+  /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
+  float feature_scale[DENOISE_FEATURES];
+  math_vector_zero(feature_scale, num_features);
+
+  FOR_PIXEL_WINDOW
+  {
+    filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
+    math_vector_max(feature_scale, features, num_features);
+  }
+  END_FOR_PIXEL_WINDOW
+
+  filter_calculate_scale(feature_scale, use_time);
+
+  /* === Generate the feature transformation. ===
+   * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
+   * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
+  float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
+  math_matrix_zero(feature_matrix, num_features);
+  FOR_PIXEL_WINDOW
+  {
+    filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
+    math_vector_mul(features, feature_scale, num_features);
+    math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
+  }
+  END_FOR_PIXEL_WINDOW
+
+  math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
+  *rank = 0;
+  /* Prevent overfitting when a small window is used. */
+  int max_rank = min(num_features, num_pixels / 3);
+  if (pca_threshold < 0.0f) {
+    float threshold_energy = 0.0f;
+    for (int i = 0; i < num_features; i++) {
+      threshold_energy += feature_matrix[i * num_features + i];
+    }
+    threshold_energy *= 1.0f - (-pca_threshold);
+
+    float reduced_energy = 0.0f;
+    for (int i = 0; i < max_rank; i++, (*rank)++) {
+      if (i >= 2 && reduced_energy >= threshold_energy)
+        break;
+      float s = feature_matrix[i * num_features + i];
+      reduced_energy += s;
+    }
+  }
+  else {
+    for (int i = 0; i < max_rank; i++, (*rank)++) {
+      float s = feature_matrix[i * num_features + i];
+      if (i >= 2 && sqrtf(s) < pca_threshold)
+        break;
+    }
+  }
+
+  /* Bake the feature scaling into the transformation matrix. */
+  for (int i = 0; i < (*rank); i++) {
+    math_vector_mul(transform + i * num_features, feature_scale, num_features);
+  }
+  math_matrix_transpose(transform, num_features, 1);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform_gpu.h b/intern/cycles/kernel/filter/filter_transform_gpu.h
index ed8ddcb49b1..89cddfd927f 100644
--- a/intern/cycles/kernel/filter/filter_transform_gpu.h
+++ b/intern/cycles/kernel/filter/filter_transform_gpu.h
@@ -18,106 +18,110 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
                                                   CCL_FILTER_TILE_INFO,
-                                                  int x, int y, int4 rect,
-                                                  int pass_stride, int frame_stride,
+                                                  int x,
+                                                  int y,
+                                                  int4 rect,
+                                                  int pass_stride,
+                                                  int frame_stride,
                                                   bool use_time,
                                                   ccl_global float *transform,
                                                   ccl_global int *rank,
-                                                  int radius, float pca_threshold,
-                                                  int transform_stride, int localIdx)
+                                                  int radius,
+                                                  float pca_threshold,
+                                                  int transform_stride,
+                                                  int localIdx)
 {
-	int buffer_w = align_up(rect.z - rect.x, 4);
+  int buffer_w = align_up(rect.z - rect.x, 4);
 
 #ifdef __KERNEL_CUDA__
-	ccl_local float shared_features[DENOISE_FEATURES*CCL_MAX_LOCAL_SIZE];
-	ccl_local_param float *features = shared_features + localIdx*DENOISE_FEATURES;
+  ccl_local float shared_features[DENOISE_FEATURES * CCL_MAX_LOCAL_SIZE];
+  ccl_local_param float *features = shared_features + localIdx * DENOISE_FEATURES;
 #else
-	float features[DENOISE_FEATURES];
+  float features[DENOISE_FEATURES];
 #endif
 
-	int num_features = use_time? 11 : 10;
-
-	/* === Calculate denoising window. === */
-	int2 low  = make_int2(max(rect.x, x - radius),
-	                      max(rect.y, y - radius));
-	int2 high = make_int2(min(rect.z, x + radius + 1),
-	                      min(rect.w, y + radius + 1));
-	int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
-	const ccl_global float *ccl_restrict pixel_buffer;
-	int3 pixel;
-
-
-
-
-	/* === Shift feature passes to have mean 0. === */
-	float feature_means[DENOISE_FEATURES];
-	math_vector_zero(feature_means, num_features);
-	FOR_PIXEL_WINDOW {
-		filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
-		math_vector_add(feature_means, features, num_features);
-	} END_FOR_PIXEL_WINDOW
-
-	math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
-
-	/* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
-	float feature_scale[DENOISE_FEATURES];
-	math_vector_zero(feature_scale, num_features);
-
-	FOR_PIXEL_WINDOW {
-		filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
-		math_vector_max(feature_scale, features, num_features);
-	} END_FOR_PIXEL_WINDOW
-
-	filter_calculate_scale(feature_scale, use_time);
-
-
-
-	/* === Generate the feature transformation. ===
-	 * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
-	 * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
-	float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
-	math_matrix_zero(feature_matrix, num_features);
-	FOR_PIXEL_WINDOW {
-		filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
-		math_vector_mul(features, feature_scale, num_features);
-		math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
-	} END_FOR_PIXEL_WINDOW
-
-	math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, transform_stride);
-	*rank = 0;
-	/* Prevent overfitting when a small window is used. */
-	int max_rank = min(num_features, num_pixels/3);
-	if(pca_threshold < 0.0f) {
-		float threshold_energy = 0.0f;
-		for(int i = 0; i < num_features; i++) {
-			threshold_energy += feature_matrix[i*num_features+i];
-		}
-		threshold_energy *= 1.0f - (-pca_threshold);
-
-		float reduced_energy = 0.0f;
-		for(int i = 0; i < max_rank; i++, (*rank)++) {
-			if(i >= 2 && reduced_energy >= threshold_energy)
-				break;
-			float s = feature_matrix[i*num_features+i];
-			reduced_energy += s;
-		}
-	}
-	else {
-		for(int i = 0; i < max_rank; i++, (*rank)++) {
-			float s = feature_matrix[i*num_features+i];
-			if(i >= 2 && sqrtf(s) < pca_threshold)
-				break;
-		}
-	}
-
-	math_matrix_transpose(transform, num_features, transform_stride);
-
-	/* Bake the feature scaling into the transformation matrix. */
-	for(int i = 0; i < num_features; i++) {
-		for(int j = 0; j < (*rank); j++) {
-			transform[(i*num_features + j)*transform_stride] *= feature_scale[i];
-		}
-	}
+  int num_features = use_time ? 11 : 10;
+
+  /* === Calculate denoising window. === */
+  int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
+  int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
+  int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
+  const ccl_global float *ccl_restrict pixel_buffer;
+  int3 pixel;
+
+  /* === Shift feature passes to have mean 0. === */
+  float feature_means[DENOISE_FEATURES];
+  math_vector_zero(feature_means, num_features);
+  FOR_PIXEL_WINDOW
+  {
+    filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
+    math_vector_add(feature_means, features, num_features);
+  }
+  END_FOR_PIXEL_WINDOW
+
+  math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
+
+  /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
+  float feature_scale[DENOISE_FEATURES];
+  math_vector_zero(feature_scale, num_features);
+
+  FOR_PIXEL_WINDOW
+  {
+    filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
+    math_vector_max(feature_scale, features, num_features);
+  }
+  END_FOR_PIXEL_WINDOW
+
+  filter_calculate_scale(feature_scale, use_time);
+
+  /* === Generate the feature transformation. ===
+   * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
+   * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
+  float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
+  math_matrix_zero(feature_matrix, num_features);
+  FOR_PIXEL_WINDOW
+  {
+    filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
+    math_vector_mul(features, feature_scale, num_features);
+    math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
+  }
+  END_FOR_PIXEL_WINDOW
+
+  math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, transform_stride);
+  *rank = 0;
+  /* Prevent overfitting when a small window is used. */
+  int max_rank = min(num_features, num_pixels / 3);
+  if (pca_threshold < 0.0f) {
+    float threshold_energy = 0.0f;
+    for (int i = 0; i < num_features; i++) {
+      threshold_energy += feature_matrix[i * num_features + i];
+    }
+    threshold_energy *= 1.0f - (-pca_threshold);
+
+    float reduced_energy = 0.0f;
+    for (int i = 0; i < max_rank; i++, (*rank)++) {
+      if (i >= 2 && reduced_energy >= threshold_energy)
+        break;
+      float s = feature_matrix[i * num_features + i];
+      reduced_energy += s;
+    }
+  }
+  else {
+    for (int i = 0; i < max_rank; i++, (*rank)++) {
+      float s = feature_matrix[i * num_features + i];
+      if (i >= 2 && sqrtf(s) < pca_threshold)
+        break;
+    }
+  }
+
+  math_matrix_transpose(transform, num_features, transform_stride);
+
+  /* Bake the feature scaling into the transformation matrix. */
+  for (int i = 0; i < num_features; i++) {
+    for (int j = 0; j < (*rank); j++) {
+      transform[(i * num_features + j) * transform_stride] *= feature_scale[i];
+    }
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/filter/filter_transform_sse.h b/intern/cycles/kernel/filter/filter_transform_sse.h
index 10bd3e477e9..22397b292db 100644
--- a/intern/cycles/kernel/filter/filter_transform_sse.h
+++ b/intern/cycles/kernel/filter/filter_transform_sse.h
@@ -18,98 +18,110 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer,
                                                   CCL_FILTER_TILE_INFO,
-                                                  int x, int y, int4 rect,
-                                                  int pass_stride, int frame_stride,
+                                                  int x,
+                                                  int y,
+                                                  int4 rect,
+                                                  int pass_stride,
+                                                  int frame_stride,
                                                   bool use_time,
-                                                  float *transform, int *rank,
-                                                  int radius, float pca_threshold)
+                                                  float *transform,
+                                                  int *rank,
+                                                  int radius,
+                                                  float pca_threshold)
 {
-	int buffer_w = align_up(rect.z - rect.x, 4);
-
-	float4 features[DENOISE_FEATURES];
-	const float *ccl_restrict pixel_buffer;
-	int3 pixel;
-
-	int num_features = use_time? 11 : 10;
-
-	/* === Calculate denoising window. === */
-	int2 low  = make_int2(max(rect.x, x - radius),
-	                      max(rect.y, y - radius));
-	int2 high = make_int2(min(rect.z, x + radius + 1),
-	                      min(rect.w, y + radius + 1));
-	int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
-
-	/* === Shift feature passes to have mean 0. === */
-	float4 feature_means[DENOISE_FEATURES];
-	math_vector_zero_sse(feature_means, num_features);
-	FOR_PIXEL_WINDOW_SSE {
-		filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, use_time, NULL, pass_stride);
-		math_vector_add_sse(feature_means, num_features, features);
-	} END_FOR_PIXEL_WINDOW_SSE
-
-	float4 pixel_scale = make_float4(1.0f / num_pixels);
-	for(int i = 0; i < num_features; i++) {
-		feature_means[i] = reduce_add(feature_means[i]) * pixel_scale;
-	}
-
-	/* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
-	float4 feature_scale[DENOISE_FEATURES];
-	math_vector_zero_sse(feature_scale, num_features);
-	FOR_PIXEL_WINDOW_SSE {
-		filter_get_feature_scales_sse(x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
-		math_vector_max_sse(feature_scale, features, num_features);
-	} END_FOR_PIXEL_WINDOW_SSE
-
-	filter_calculate_scale_sse(feature_scale, use_time);
-
-	/* === Generate the feature transformation. ===
-	 * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
-	 * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
-	float4 feature_matrix_sse[DENOISE_FEATURES*DENOISE_FEATURES];
-	math_matrix_zero_sse(feature_matrix_sse, num_features);
-	FOR_PIXEL_WINDOW_SSE {
-		filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
-		math_vector_mul_sse(features, num_features, feature_scale);
-		math_matrix_add_gramian_sse(feature_matrix_sse, num_features, features, make_float4(1.0f));
-	} END_FOR_PIXEL_WINDOW_SSE
-
-	float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
-	math_matrix_hsum(feature_matrix, num_features, feature_matrix_sse);
-
-	math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
-
-	*rank = 0;
-	/* Prevent overfitting when a small window is used. */
-	int max_rank = min(num_features, num_pixels/3);
-	if(pca_threshold < 0.0f) {
-		float threshold_energy = 0.0f;
-		for(int i = 0; i < num_features; i++) {
-			threshold_energy += feature_matrix[i*num_features+i];
-		}
-		threshold_energy *= 1.0f - (-pca_threshold);
-
-		float reduced_energy = 0.0f;
-		for(int i = 0; i < max_rank; i++, (*rank)++) {
-			if(i >= 2 && reduced_energy >= threshold_energy)
-				break;
-			float s = feature_matrix[i*num_features+i];
-			reduced_energy += s;
-		}
-	}
-	else {
-		for(int i = 0; i < max_rank; i++, (*rank)++) {
-			float s = feature_matrix[i*num_features+i];
-			if(i >= 2 && sqrtf(s) < pca_threshold)
-				break;
-		}
-	}
-
-	math_matrix_transpose(transform, num_features, 1);
-
-	/* Bake the feature scaling into the transformation matrix. */
-	for(int i = 0; i < num_features; i++) {
-		math_vector_scale(transform + i*num_features, feature_scale[i][0], *rank);
-	}
+  int buffer_w = align_up(rect.z - rect.x, 4);
+
+  float4 features[DENOISE_FEATURES];
+  const float *ccl_restrict pixel_buffer;
+  int3 pixel;
+
+  int num_features = use_time ? 11 : 10;
+
+  /* === Calculate denoising window. === */
+  int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
+  int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
+  int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
+
+  /* === Shift feature passes to have mean 0. === */
+  float4 feature_means[DENOISE_FEATURES];
+  math_vector_zero_sse(feature_means, num_features);
+  FOR_PIXEL_WINDOW_SSE
+  {
+    filter_get_features_sse(
+        x4, y4, t4, active_pixels, pixel_buffer, features, use_time, NULL, pass_stride);
+    math_vector_add_sse(feature_means, num_features, features);
+  }
+  END_FOR_PIXEL_WINDOW_SSE
+
+  float4 pixel_scale = make_float4(1.0f / num_pixels);
+  for (int i = 0; i < num_features; i++) {
+    feature_means[i] = reduce_add(feature_means[i]) * pixel_scale;
+  }
+
+  /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
+  float4 feature_scale[DENOISE_FEATURES];
+  math_vector_zero_sse(feature_scale, num_features);
+  FOR_PIXEL_WINDOW_SSE
+  {
+    filter_get_feature_scales_sse(
+        x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
+    math_vector_max_sse(feature_scale, features, num_features);
+  }
+  END_FOR_PIXEL_WINDOW_SSE
+
+  filter_calculate_scale_sse(feature_scale, use_time);
+
+  /* === Generate the feature transformation. ===
+   * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space
+   * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
+  float4 feature_matrix_sse[DENOISE_FEATURES * DENOISE_FEATURES];
+  math_matrix_zero_sse(feature_matrix_sse, num_features);
+  FOR_PIXEL_WINDOW_SSE
+  {
+    filter_get_features_sse(
+        x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
+    math_vector_mul_sse(features, num_features, feature_scale);
+    math_matrix_add_gramian_sse(feature_matrix_sse, num_features, features, make_float4(1.0f));
+  }
+  END_FOR_PIXEL_WINDOW_SSE
+
+  float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
+  math_matrix_hsum(feature_matrix, num_features, feature_matrix_sse);
+
+  math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
+
+  *rank = 0;
+  /* Prevent overfitting when a small window is used. */
+  int max_rank = min(num_features, num_pixels / 3);
+  if (pca_threshold < 0.0f) {
+    float threshold_energy = 0.0f;
+    for (int i = 0; i < num_features; i++) {
+      threshold_energy += feature_matrix[i * num_features + i];
+    }
+    threshold_energy *= 1.0f - (-pca_threshold);
+
+    float reduced_energy = 0.0f;
+    for (int i = 0; i < max_rank; i++, (*rank)++) {
+      if (i >= 2 && reduced_energy >= threshold_energy)
+        break;
+      float s = feature_matrix[i * num_features + i];
+      reduced_energy += s;
+    }
+  }
+  else {
+    for (int i = 0; i < max_rank; i++, (*rank)++) {
+      float s = feature_matrix[i * num_features + i];
+      if (i >= 2 && sqrtf(s) < pca_threshold)
+        break;
+    }
+  }
+
+  math_matrix_transpose(transform, num_features, 1);
+
+  /* Bake the feature scaling into the transformation matrix. */
+  for (int i = 0; i < num_features; i++) {
+    math_vector_scale(transform + i * num_features, feature_scale[i][0], *rank);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
index e991f3d685a..456608bfa22 100644
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -30,81 +30,83 @@ ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *
 ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderData *sd)
 {
 #ifdef __HAIR__
-	if(sd->type & PRIMITIVE_ALL_CURVE) {
-		return ATTR_PRIM_CURVE;
-	}
-	else
+  if (sd->type & PRIMITIVE_ALL_CURVE) {
+    return ATTR_PRIM_CURVE;
+  }
+  else
 #endif
-	if(subd_triangle_patch(kg, sd) != ~0) {
-		return ATTR_PRIM_SUBD;
-	}
-	else {
-		return ATTR_PRIM_TRIANGLE;
-	}
+      if (subd_triangle_patch(kg, sd) != ~0) {
+    return ATTR_PRIM_SUBD;
+  }
+  else {
+    return ATTR_PRIM_TRIANGLE;
+  }
 }
 
 ccl_device_inline AttributeDescriptor attribute_not_found()
 {
-	const AttributeDescriptor desc = {ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND};
-	return desc;
+  const AttributeDescriptor desc = {
+      ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND};
+  return desc;
 }
 
 /* Find attribute based on ID */
 
 ccl_device_inline uint object_attribute_map_offset(KernelGlobals *kg, int object)
 {
-	return kernel_tex_fetch(__objects, object).attribute_map_offset;
+  return kernel_tex_fetch(__objects, object).attribute_map_offset;
 }
 
-ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id)
+ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg,
+                                                     const ShaderData *sd,
+                                                     uint id)
 {
-	if(sd->object == OBJECT_NONE) {
-		return attribute_not_found();
-	}
-
-	/* for SVM, find attribute by unique id */
-	uint attr_offset = object_attribute_map_offset(kg, sd->object);
-	attr_offset += attribute_primitive_type(kg, sd);
-	uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-
-	while(attr_map.x != id) {
-		if(UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
-			return attribute_not_found();
-		}
-		attr_offset += ATTR_PRIM_TYPES;
-		attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-	}
-
-	AttributeDescriptor desc;
-	desc.element = (AttributeElement)attr_map.y;
-
-	if(sd->prim == PRIM_NONE &&
-	   desc.element != ATTR_ELEMENT_MESH &&
-	   desc.element != ATTR_ELEMENT_VOXEL &&
-	   desc.element != ATTR_ELEMENT_OBJECT)
-	{
-		return attribute_not_found();
-	}
-
-	/* return result */
-	desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
-	desc.type = (NodeAttributeType)(attr_map.w & 0xff);
-	desc.flags = (AttributeFlag)(attr_map.w >> 8);
-
-	return desc;
+  if (sd->object == OBJECT_NONE) {
+    return attribute_not_found();
+  }
+
+  /* for SVM, find attribute by unique id */
+  uint attr_offset = object_attribute_map_offset(kg, sd->object);
+  attr_offset += attribute_primitive_type(kg, sd);
+  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+  while (attr_map.x != id) {
+    if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
+      return attribute_not_found();
+    }
+    attr_offset += ATTR_PRIM_TYPES;
+    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+  }
+
+  AttributeDescriptor desc;
+  desc.element = (AttributeElement)attr_map.y;
+
+  if (sd->prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH &&
+      desc.element != ATTR_ELEMENT_VOXEL && desc.element != ATTR_ELEMENT_OBJECT) {
+    return attribute_not_found();
+  }
+
+  /* return result */
+  desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+  desc.type = (NodeAttributeType)(attr_map.w & 0xff);
+  desc.flags = (AttributeFlag)(attr_map.w >> 8);
+
+  return desc;
 }
 
 /* Transform matrix attribute on meshes */
 
-ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc)
+ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg,
+                                                const ShaderData *sd,
+                                                const AttributeDescriptor desc)
 {
-	Transform tfm;
+  Transform tfm;
 
-	tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
-	tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
-	tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
+  tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
+  tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
+  tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
 
-	return tfm;
+  return tfm;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 9b60cf6d56b..e0aacb434eb 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -27,169 +27,199 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
 {
-	float fc = 0.71f;
-	float data[4];
-	float t2 = t * t;
-	data[0] = -3.0f * fc          * t2  + 4.0f * fc * t                  - fc;
-	data[1] =  3.0f * (2.0f - fc) * t2  + 2.0f * (fc - 3.0f) * t;
-	data[2] =  3.0f * (fc - 2.0f) * t2  + 2.0f * (3.0f - 2.0f * fc) * t  + fc;
-	data[3] =  3.0f * fc          * t2  - 2.0f * fc * t;
-	return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
+  float fc = 0.71f;
+  float data[4];
+  float t2 = t * t;
+  data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc;
+  data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t;
+  data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc;
+  data[3] = 3.0f * fc * t2 - 2.0f * fc * t;
+  return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
 }
 
 ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3)
 {
-	float data[4];
-	float fc = 0.71f;
-	float t2 = t * t;
-	float t3 = t2 * t;
-	data[0] = -fc          * t3  + 2.0f * fc          * t2 - fc * t;
-	data[1] =  (2.0f - fc) * t3  + (fc - 3.0f)        * t2 + 1.0f;
-	data[2] =  (fc - 2.0f) * t3  + (3.0f - 2.0f * fc) * t2 + fc * t;
-	data[3] =  fc          * t3  - fc * t2;
-	return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
+  float data[4];
+  float fc = 0.71f;
+  float t2 = t * t;
+  float t3 = t2 * t;
+  data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t;
+  data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f;
+  data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t;
+  data[3] = fc * t3 - fc * t2;
+  return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
 }
 
 /* Reading attributes on various curve elements */
 
-ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
+ccl_device float curve_attribute_float(
+    KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
 {
-	if(desc.element == ATTR_ELEMENT_CURVE) {
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = 0.0f;
-		if(dy) *dy = 0.0f;
-#endif
-
-		return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim);
-	}
-	else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
-		float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
-		int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-		int k1 = k0 + 1;
-
-		float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0);
-		float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1);
-
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*(f1 - f0);
-		if(dy) *dy = 0.0f;
-#endif
-
-		return (1.0f - sd->u)*f0 + sd->u*f1;
-	}
-	else {
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = 0.0f;
-		if(dy) *dy = 0.0f;
-#endif
-
-		return 0.0f;
-	}
+  if (desc.element == ATTR_ELEMENT_CURVE) {
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+#  endif
+
+    return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim);
+  }
+  else if (desc.element == ATTR_ELEMENT_CURVE_KEY ||
+           desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+    float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+    int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+    int k1 = k0 + 1;
+
+    float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0);
+    float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1);
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * (f1 - f0);
+    if (dy)
+      *dy = 0.0f;
+#  endif
+
+    return (1.0f - sd->u) * f0 + sd->u * f1;
+  }
+  else {
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+#  endif
+
+    return 0.0f;
+  }
 }
 
-ccl_device float2 curve_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy)
+ccl_device float2 curve_attribute_float2(KernelGlobals *kg,
+                                         const ShaderData *sd,
+                                         const AttributeDescriptor desc,
+                                         float2 *dx,
+                                         float2 *dy)
 {
-	if(desc.element == ATTR_ELEMENT_CURVE) {
-		/* idea: we can't derive any useful differentials here, but for tiled
-		 * mipmap image caching it would be useful to avoid reading the highest
-		 * detail level always. maybe a derivative based on the hair density
-		 * could be computed somehow? */
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = make_float2(0.0f, 0.0f);
-		if(dy) *dy = make_float2(0.0f, 0.0f);
-#endif
-
-		return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim);
-	}
-	else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
-		float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
-		int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-		int k1 = k0 + 1;
-
-		float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0);
-		float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + k1);
-
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*(f1 - f0);
-		if(dy) *dy = make_float2(0.0f, 0.0f);
-#endif
-
-		return (1.0f - sd->u)*f0 + sd->u*f1;
-	}
-	else {
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = make_float2(0.0f, 0.0f);
-		if(dy) *dy = make_float2(0.0f, 0.0f);
-#endif
-
-		return make_float2(0.0f, 0.0f);
-	}
+  if (desc.element == ATTR_ELEMENT_CURVE) {
+    /* idea: we can't derive any useful differentials here, but for tiled
+     * mipmap image caching it would be useful to avoid reading the highest
+     * detail level always. maybe a derivative based on the hair density
+     * could be computed somehow? */
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+#  endif
+
+    return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim);
+  }
+  else if (desc.element == ATTR_ELEMENT_CURVE_KEY ||
+           desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+    float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+    int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+    int k1 = k0 + 1;
+
+    float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0);
+    float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + k1);
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * (f1 - f0);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+#  endif
+
+    return (1.0f - sd->u) * f0 + sd->u * f1;
+  }
+  else {
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+#  endif
+
+    return make_float2(0.0f, 0.0f);
+  }
 }
 
-ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
+ccl_device float3 curve_attribute_float3(KernelGlobals *kg,
+                                         const ShaderData *sd,
+                                         const AttributeDescriptor desc,
+                                         float3 *dx,
+                                         float3 *dy)
 {
-	if(desc.element == ATTR_ELEMENT_CURVE) {
-		/* idea: we can't derive any useful differentials here, but for tiled
-		 * mipmap image caching it would be useful to avoid reading the highest
-		 * detail level always. maybe a derivative based on the hair density
-		 * could be computed somehow? */
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
-		if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
-
-		return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim));
-	}
-	else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
-		float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
-		int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-		int k1 = k0 + 1;
-
-		float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
-		float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1));
-
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*(f1 - f0);
-		if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
-
-		return (1.0f - sd->u)*f0 + sd->u*f1;
-	}
-	else {
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
-		if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
-
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  if (desc.element == ATTR_ELEMENT_CURVE) {
+    /* idea: we can't derive any useful differentials here, but for tiled
+     * mipmap image caching it would be useful to avoid reading the highest
+     * detail level always. maybe a derivative based on the hair density
+     * could be computed somehow? */
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+#  endif
+
+    return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim));
+  }
+  else if (desc.element == ATTR_ELEMENT_CURVE_KEY ||
+           desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+    float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+    int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+    int k1 = k0 + 1;
+
+    float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
+    float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1));
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * (f1 - f0);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+#  endif
+
+    return (1.0f - sd->u) * f0 + sd->u * f1;
+  }
+  else {
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+#  endif
+
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
 /* Curve thickness */
 
 ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
 {
-	float r = 0.0f;
+  float r = 0.0f;
 
-	if(sd->type & PRIMITIVE_ALL_CURVE) {
-		float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
-		int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-		int k1 = k0 + 1;
+  if (sd->type & PRIMITIVE_ALL_CURVE) {
+    float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+    int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+    int k1 = k0 + 1;
 
-		float4 P_curve[2];
+    float4 P_curve[2];
 
-		if(sd->type & PRIMITIVE_CURVE) {
-			P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
-			P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
-		}
-		else {
-			motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
-		}
+    if (sd->type & PRIMITIVE_CURVE) {
+      P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
+      P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
+    }
+    else {
+      motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+    }
 
-		r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
-	}
+    r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
+  }
 
-	return r*2.0f;
+  return r * 2.0f;
 }
 
 /* Curve location for motion pass, linear interpolation between keys and
@@ -197,89 +227,98 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
 
 ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd)
 {
-	float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
-	int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-	int k1 = k0 + 1;
+  float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+  int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+  int k1 = k0 + 1;
 
-	float4 P_curve[2];
+  float4 P_curve[2];
 
-	P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
-	P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
+  P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
+  P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
 
-	return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u);
+  return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u);
 }
 
 /* Curve tangent normal */
 
 ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd)
 {
-	float3 tgN = make_float3(0.0f,0.0f,0.0f);
+  float3 tgN = make_float3(0.0f, 0.0f, 0.0f);
 
-	if(sd->type & PRIMITIVE_ALL_CURVE) {
+  if (sd->type & PRIMITIVE_ALL_CURVE) {
 
-		tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu)));
-		tgN = normalize(tgN);
+    tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu, -sd->I) / len_squared(sd->dPdu)));
+    tgN = normalize(tgN);
 
-		/* need to find suitable scaled gd for corrected normal */
-#if 0
-		tgN = normalize(tgN - gd * sd->dPdu);
-#endif
-	}
+    /* need to find suitable scaled gd for corrected normal */
+#  if 0
+    tgN = normalize(tgN - gd * sd->dPdu);
+#  endif
+  }
 
-	return tgN;
+  return tgN;
 }
 
 /* Curve bounds utility function */
 
-ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta, float *extrema, float *extremtb, float *extremb, float p0, float p1, float p2, float p3)
+ccl_device_inline void curvebounds(float *lower,
+                                   float *upper,
+                                   float *extremta,
+                                   float *extrema,
+                                   float *extremtb,
+                                   float *extremb,
+                                   float p0,
+                                   float p1,
+                                   float p2,
+                                   float p3)
 {
-	float halfdiscroot = (p2 * p2 - 3 * p3 * p1);
-	float ta = -1.0f;
-	float tb = -1.0f;
-
-	*extremta = -1.0f;
-	*extremtb = -1.0f;
-	*upper = p0;
-	*lower = (p0 + p1) + (p2 + p3);
-	*extrema = *upper;
-	*extremb = *lower;
-
-	if(*lower >= *upper) {
-		*upper = *lower;
-		*lower = p0;
-	}
-
-	if(halfdiscroot >= 0) {
-		float inv3p3 = (1.0f/3.0f)/p3;
-		halfdiscroot = sqrtf(halfdiscroot);
-		ta = (-p2 - halfdiscroot) * inv3p3;
-		tb = (-p2 + halfdiscroot) * inv3p3;
-	}
-
-	float t2;
-	float t3;
-
-	if(ta > 0.0f && ta < 1.0f) {
-		t2 = ta * ta;
-		t3 = t2 * ta;
-		*extremta = ta;
-		*extrema = p3 * t3 + p2 * t2 + p1 * ta + p0;
-
-		*upper = fmaxf(*extrema, *upper);
-		*lower = fminf(*extrema, *lower);
-	}
-
-	if(tb > 0.0f && tb < 1.0f) {
-		t2 = tb * tb;
-		t3 = t2 * tb;
-		*extremtb = tb;
-		*extremb = p3 * t3 + p2 * t2 + p1 * tb + p0;
-
-		*upper = fmaxf(*extremb, *upper);
-		*lower = fminf(*extremb, *lower);
-	}
+  float halfdiscroot = (p2 * p2 - 3 * p3 * p1);
+  float ta = -1.0f;
+  float tb = -1.0f;
+
+  *extremta = -1.0f;
+  *extremtb = -1.0f;
+  *upper = p0;
+  *lower = (p0 + p1) + (p2 + p3);
+  *extrema = *upper;
+  *extremb = *lower;
+
+  if (*lower >= *upper) {
+    *upper = *lower;
+    *lower = p0;
+  }
+
+  if (halfdiscroot >= 0) {
+    float inv3p3 = (1.0f / 3.0f) / p3;
+    halfdiscroot = sqrtf(halfdiscroot);
+    ta = (-p2 - halfdiscroot) * inv3p3;
+    tb = (-p2 + halfdiscroot) * inv3p3;
+  }
+
+  float t2;
+  float t3;
+
+  if (ta > 0.0f && ta < 1.0f) {
+    t2 = ta * ta;
+    t3 = t2 * ta;
+    *extremta = ta;
+    *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0;
+
+    *upper = fmaxf(*extrema, *upper);
+    *lower = fminf(*extrema, *lower);
+  }
+
+  if (tb > 0.0f && tb < 1.0f) {
+    t2 = tb * tb;
+    t3 = t2 * tb;
+    *extremtb = tb;
+    *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0;
+
+    *upper = fmaxf(*extremb, *upper);
+    *lower = fminf(*extremb, *lower);
+  }
 }
 
-#endif  /* __HAIR__ */
+#endif /* __HAIR__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index 5cf8713e3a8..5fd277c2f99 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -18,484 +18,534 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __HAIR__
 
-#ifdef __KERNEL_SSE2__
+#  ifdef __KERNEL_SSE2__
 ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a)
 {
-	return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2]));
+  return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2]));
 }
-#endif
+#  endif
 
 /* On CPU pass P and dir by reference to aligned vector. */
-ccl_device_forceinline bool cardinal_curve_intersect(
-        KernelGlobals *kg,
-        Intersection *isect,
-        const float3 ccl_ref P,
-        const float3 ccl_ref dir,
-        uint visibility,
-        int object,
-        int curveAddr,
-        float time,
-        int type,
-        uint *lcg_state,
-        float difl,
-        float extmax)
+ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg,
+                                                     Intersection *isect,
+                                                     const float3 ccl_ref P,
+                                                     const float3 ccl_ref dir,
+                                                     uint visibility,
+                                                     int object,
+                                                     int curveAddr,
+                                                     float time,
+                                                     int type,
+                                                     uint *lcg_state,
+                                                     float difl,
+                                                     float extmax)
 {
-	const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
-
-	if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
-		const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
-		if(time < prim_time.x || time > prim_time.y) {
-			return false;
-		}
-	}
-
-	int segment = PRIMITIVE_UNPACK_SEGMENT(type);
-	float epsilon = 0.0f;
-	float r_st, r_en;
-
-	int depth = kernel_data.curve.subdivisions;
-	int flags = kernel_data.curve.curveflags;
-	int prim = kernel_tex_fetch(__prim_index, curveAddr);
-
-#ifdef __KERNEL_SSE2__
-	ssef vdir = load4f(dir);
-	ssef vcurve_coef[4];
-	const float3 *curve_coef = (float3 *)vcurve_coef;
-
-	{
-		ssef dtmp = vdir * vdir;
-		ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp));
-		ssef rd_ss = load1f_first(1.0f) / d_ss;
-
-		ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]);
-		int2 &v00 = (int2 &)v00vec;
-
-		int k0 = v00.x + segment;
-		int k1 = k0 + 1;
-		int ka = max(k0 - 1, v00.x);
-		int kb = min(k1 + 1, v00.x + v00.y - 1);
-
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && (!defined(_MSC_VER) || _MSC_VER > 1800)
-		avxf P_curve_0_1, P_curve_2_3;
-		if(is_curve_primitive) {
-			P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
-			P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
-		}
-		else {
-			int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
-			motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3);
-		}
-#else  /* __KERNEL_AVX2__ */
-		ssef P_curve[4];
-
-		if(is_curve_primitive) {
-			P_curve[0] = load4f(&kg->__curve_keys.data[ka].x);
-			P_curve[1] = load4f(&kg->__curve_keys.data[k0].x);
-			P_curve[2] = load4f(&kg->__curve_keys.data[k1].x);
-			P_curve[3] = load4f(&kg->__curve_keys.data[kb].x);
-		}
-		else {
-			int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
-			motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve);
-		}
-#endif  /* __KERNEL_AVX2__ */
-
-		ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
-		ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
-		ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy;
-		ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz);
-		ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0));
-
-		ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0);
-		ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
-		ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
-
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && (!defined(_MSC_VER) || _MSC_VER > 1800)
-		const avxf vPP = _mm256_broadcast_ps(&P.m128);
-		const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
-		const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
-		const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
-
-		const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP),
-		                      htfm00,
-		                      madd(shuffle<1>(P_curve_0_1 - vPP),
-		                           htfm11,
-		                           shuffle<2>(P_curve_0_1 - vPP) * htfm22));
-		const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP),
-		                      htfm00,
-		                      madd(shuffle<1>(P_curve_2_3 - vPP),
-		                           htfm11,
-		                           shuffle<2>(P_curve_2_3 - vPP)*htfm22));
-
-		const ssef p0 = _mm256_castps256_ps128(p01);
-		const ssef p1 = _mm256_extractf128_ps(p01, 1);
-		const ssef p2 = _mm256_castps256_ps128(p23);
-		const ssef p3 = _mm256_extractf128_ps(p23, 1);
-
-		const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
-		r_st = ((float4 &)P_curve_1).w;
-		const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
-		r_en = ((float4 &)P_curve_2).w;
-#else  /* __KERNEL_AVX2__ */
-		ssef htfm[] = { htfm0, htfm1, htfm2 };
-		ssef vP = load4f(P);
-		ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
-		ssef p1 = transform_point_T3(htfm, P_curve[1] - vP);
-		ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
-		ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
-
-		r_st = ((float4 &)P_curve[1]).w;
-		r_en = ((float4 &)P_curve[2]).w;
-#endif  /* __KERNEL_AVX2__ */
-
-		float fc = 0.71f;
-		ssef vfc = ssef(fc);
-		ssef vfcxp3 = vfc * p3;
-
-		vcurve_coef[0] = p1;
-		vcurve_coef[1] = vfc * (p2 - p0);
-		vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
-		vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
-
-	}
-#else
-	float3 curve_coef[4];
-
-	/* curve Intersection check */
-	/* obtain curve parameters */
-	{
-		/* ray transform created - this should be created at beginning of intersection loop */
-		Transform htfm;
-		float d = sqrtf(dir.x * dir.x + dir.z * dir.z);
-		htfm = make_transform(
-			dir.z / d, 0, -dir.x /d, 0,
-			-dir.x * dir.y /d, d, -dir.y * dir.z /d, 0,
-			dir.x, dir.y, dir.z, 0);
-
-		float4 v00 = kernel_tex_fetch(__curves, prim);
-
-		int k0 = __float_as_int(v00.x) + segment;
-		int k1 = k0 + 1;
-
-		int ka = max(k0 - 1,__float_as_int(v00.x));
-		int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1);
-
-		float4 P_curve[4];
-
-		if(is_curve_primitive) {
-			P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
-			P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
-			P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
-			P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
-		}
-		else {
-			int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
-			motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve);
-		}
-
-		float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P);
-		float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P);
-		float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P);
-		float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P);
-
-		float fc = 0.71f;
-		curve_coef[0] = p1;
-		curve_coef[1] = -fc*p0 + fc*p2;
-		curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3;
-		curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3;
-		r_st = P_curve[1].w;
-		r_en = P_curve[2].w;
-	}
-#endif
-
-	float r_curr = max(r_st, r_en);
-
-	if((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING))
-		epsilon = 2 * r_curr;
-
-	/* find bounds - this is slow for cubic curves */
-	float upper, lower;
-
-	float zextrem[4];
-	curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z);
-	if(lower - r_curr > isect->t || upper + r_curr < epsilon)
-		return false;
-
-	/* minimum width extension */
-	float mw_extension = min(difl * fabsf(upper), extmax);
-	float r_ext = mw_extension + r_curr;
-
-	float xextrem[4];
-	curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x);
-	if(lower > r_ext || upper < -r_ext)
-		return false;
-
-	float yextrem[4];
-	curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y);
-	if(lower > r_ext || upper < -r_ext)
-		return false;
-
-	/* setup recurrent loop */
-	int level = 1 << depth;
-	int tree = 0;
-	float resol = 1.0f / (float)level;
-	bool hit = false;
-
-	/* begin loop */
-	while(!(tree >> (depth))) {
-		const float i_st = tree * resol;
-		const float i_en = i_st + (level * resol);
-
-#ifdef __KERNEL_SSE2__
-		ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
-		ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
-		ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]);
-
-		ssef vbmin = min(vp_st, vp_en);
-		ssef vbmax = max(vp_st, vp_en);
-
-		float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax;
-		float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z;
-		float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z;
-		float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en;
-#else
-		float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + curve_coef[0];
-		float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + curve_coef[0];
-
-		float bminx = min(p_st.x, p_en.x);
-		float bmaxx = max(p_st.x, p_en.x);
-		float bminy = min(p_st.y, p_en.y);
-		float bmaxy = max(p_st.y, p_en.y);
-		float bminz = min(p_st.z, p_en.z);
-		float bmaxz = max(p_st.z, p_en.z);
-#endif
-
-		if(xextrem[0] >= i_st && xextrem[0] <= i_en) {
-			bminx = min(bminx,xextrem[1]);
-			bmaxx = max(bmaxx,xextrem[1]);
-		}
-		if(xextrem[2] >= i_st && xextrem[2] <= i_en) {
-			bminx = min(bminx,xextrem[3]);
-			bmaxx = max(bmaxx,xextrem[3]);
-		}
-		if(yextrem[0] >= i_st && yextrem[0] <= i_en) {
-			bminy = min(bminy,yextrem[1]);
-			bmaxy = max(bmaxy,yextrem[1]);
-		}
-		if(yextrem[2] >= i_st && yextrem[2] <= i_en) {
-			bminy = min(bminy,yextrem[3]);
-			bmaxy = max(bmaxy,yextrem[3]);
-		}
-		if(zextrem[0] >= i_st && zextrem[0] <= i_en) {
-			bminz = min(bminz,zextrem[1]);
-			bmaxz = max(bmaxz,zextrem[1]);
-		}
-		if(zextrem[2] >= i_st && zextrem[2] <= i_en) {
-			bminz = min(bminz,zextrem[3]);
-			bmaxz = max(bmaxz,zextrem[3]);
-		}
-
-		float r1 = r_st + (r_en - r_st) * i_st;
-		float r2 = r_st + (r_en - r_st) * i_en;
-		r_curr = max(r1, r2);
-
-		mw_extension = min(difl * fabsf(bmaxz), extmax);
-		float r_ext = mw_extension + r_curr;
-		float coverage = 1.0f;
-
-		if(bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) {
-			/* the bounding box does not overlap the square centered at O */
-			tree += level;
-			level = tree & -tree;
-		}
-		else if(level == 1) {
-
-			/* the maximum recursion depth is reached.
-			 * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
-			 * dP* is reversed if necessary.*/
-			float t = isect->t;
-			float u = 0.0f;
-			float gd = 0.0f;
-
-			if(flags & CURVE_KN_RIBBONS) {
-				float3 tg = (p_en - p_st);
-#ifdef __KERNEL_SSE__
-				const float3 tg_sq = tg * tg;
-				float w = tg_sq.x + tg_sq.y;
-#else
-				float w = tg.x * tg.x + tg.y * tg.y;
-#endif
-				if(w == 0) {
-					tree++;
-					level = tree & -tree;
-					continue;
-				}
-#ifdef __KERNEL_SSE__
-				const float3 p_sttg = p_st * tg;
-				w = -(p_sttg.x + p_sttg.y) / w;
-#else
-				w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
-#endif
-				w = saturate(w);
-
-				/* compute u on the curve segment */
-				u = i_st * (1 - w) + i_en * w;
-				r_curr = r_st + (r_en - r_st) * u;
-				/* compare x-y distances */
-				float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0];
-
-				float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
-				if(dot(tg, dp_st)< 0)
-					dp_st *= -1;
-				if(dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
-					tree++;
-					level = tree & -tree;
-					continue;
-				}
-				float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
-				if(dot(tg, dp_en) < 0)
-					dp_en *= -1;
-				if(dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
-					tree++;
-					level = tree & -tree;
-					continue;
-				}
-
-				/* compute coverage */
-				float r_ext = r_curr;
-				coverage = 1.0f;
-				if(difl != 0.0f) {
-					mw_extension = min(difl * fabsf(bmaxz), extmax);
-					r_ext = mw_extension + r_curr;
-#ifdef __KERNEL_SSE__
-					const float3 p_curr_sq = p_curr * p_curr;
-					const float3 dxxx(_mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128)));
-					float d = dxxx.x;
-#else
-					float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
-#endif
-					float d0 = d - r_curr;
-					float d1 = d + r_curr;
-					float inv_mw_extension = 1.0f/mw_extension;
-					if(d0 >= 0)
-						coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f;
-					else  // inside
-						coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f;
-				}
-
-				if(p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) {
-					tree++;
-					level = tree & -tree;
-					continue;
-				}
-
-				t = p_curr.z;
-
-				/* stochastic fade from minimum width */
-				if(difl != 0.0f && lcg_state) {
-					if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
-						return hit;
-				}
-			}
-			else {
-				float l = len(p_en - p_st);
-				/* minimum width extension */
-				float or1 = r1;
-				float or2 = r2;
-
-				if(difl != 0.0f) {
-					mw_extension = min(len(p_st - P) * difl, extmax);
-					or1 = r1 < mw_extension ? mw_extension : r1;
-					mw_extension = min(len(p_en - P) * difl, extmax);
-					or2 = r2 < mw_extension ? mw_extension : r2;
-				}
-				/* --- */
-				float invl = 1.0f/l;
-				float3 tg = (p_en - p_st) * invl;
-				gd = (or2 - or1) * invl;
-				float difz = -dot(p_st,tg);
-				float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd));
-				float invcyla = 1.0f/cyla;
-				float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1)));
-				float tcentre = -halfb*invcyla;
-				float zcentre = difz + (tg.z * tcentre);
-				float3 tdif = - p_st;
-				tdif.z += tcentre;
-				float tdifz = dot(tdif,tg);
-				float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1)));
-				float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd;
-				float td = tb*tb - 4*cyla*tc;
-				if(td < 0.0f) {
-					tree++;
-					level = tree & -tree;
-					continue;
-				}
-
-				float rootd = sqrtf(td);
-				float correction = (-tb - rootd) * 0.5f * invcyla;
-				t = tcentre + correction;
-
-				float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
-				if(dot(tg, dp_st)< 0)
-					dp_st *= -1;
-				float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
-				if(dot(tg, dp_en) < 0)
-					dp_en *= -1;
-
-				if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) {
-					correction = (-tb + rootd) * 0.5f * invcyla;
-					t = tcentre + correction;
-				}
-
-				if(dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) {
-					tree++;
-					level = tree & -tree;
-					continue;
-				}
-
-				float w = (zcentre + (tg.z * correction)) * invl;
-				w = saturate(w);
-				/* compute u on the curve segment */
-				u = i_st * (1 - w) + i_en * w;
-
-				/* stochastic fade from minimum width */
-				if(difl != 0.0f && lcg_state) {
-					r_curr = r1 + (r2 - r1) * w;
-					r_ext = or1 + (or2 - or1) * w;
-					coverage = r_curr/r_ext;
-
-					if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
-						return hit;
-				}
-			}
-			/* we found a new intersection */
-
-#ifdef __VISIBILITY_FLAG__
-			/* visibility flag test. we do it here under the assumption
-			 * that most triangles are culled by node flags */
-			if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
-#endif
-			{
-				/* record intersection */
-				isect->t = t;
-				isect->u = u;
-				isect->v = gd;
-				isect->prim = curveAddr;
-				isect->object = object;
-				isect->type = type;
-				hit = true;
-			}
-
-			tree++;
-			level = tree & -tree;
-		}
-		else {
-			/* split the curve into two curves and process */
-			level = level >> 1;
-		}
-	}
-
-	return hit;
+  const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
+
+  if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
+    const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
+    if (time < prim_time.x || time > prim_time.y) {
+      return false;
+    }
+  }
+
+  int segment = PRIMITIVE_UNPACK_SEGMENT(type);
+  float epsilon = 0.0f;
+  float r_st, r_en;
+
+  int depth = kernel_data.curve.subdivisions;
+  int flags = kernel_data.curve.curveflags;
+  int prim = kernel_tex_fetch(__prim_index, curveAddr);
+
+#  ifdef __KERNEL_SSE2__
+  ssef vdir = load4f(dir);
+  ssef vcurve_coef[4];
+  const float3 *curve_coef = (float3 *)vcurve_coef;
+
+  {
+    ssef dtmp = vdir * vdir;
+    ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp));
+    ssef rd_ss = load1f_first(1.0f) / d_ss;
+
+    ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]);
+    int2 &v00 = (int2 &)v00vec;
+
+    int k0 = v00.x + segment;
+    int k1 = k0 + 1;
+    int ka = max(k0 - 1, v00.x);
+    int kb = min(k1 + 1, v00.x + v00.y - 1);
+
+#    if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \
+        (!defined(_MSC_VER) || _MSC_VER > 1800)
+    avxf P_curve_0_1, P_curve_2_3;
+    if (is_curve_primitive) {
+      P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
+      P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
+    }
+    else {
+      int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+      motion_cardinal_curve_keys_avx(
+          kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1, &P_curve_2_3);
+    }
+#    else  /* __KERNEL_AVX2__ */
+    ssef P_curve[4];
+
+    if (is_curve_primitive) {
+      P_curve[0] = load4f(&kg->__curve_keys.data[ka].x);
+      P_curve[1] = load4f(&kg->__curve_keys.data[k0].x);
+      P_curve[2] = load4f(&kg->__curve_keys.data[k1].x);
+      P_curve[3] = load4f(&kg->__curve_keys.data[kb].x);
+    }
+    else {
+      int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+      motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4 *)&P_curve);
+    }
+#    endif /* __KERNEL_AVX2__ */
+
+    ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
+    ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
+    ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy;
+    ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz);
+    ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0));
+
+    ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0);
+    ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
+    ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
+
+#    if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \
+        (!defined(_MSC_VER) || _MSC_VER > 1800)
+    const avxf vPP = _mm256_broadcast_ps(&P.m128);
+    const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
+    const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
+    const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
+
+    const avxf p01 = madd(
+        shuffle<0>(P_curve_0_1 - vPP),
+        htfm00,
+        madd(shuffle<1>(P_curve_0_1 - vPP), htfm11, shuffle<2>(P_curve_0_1 - vPP) * htfm22));
+    const avxf p23 = madd(
+        shuffle<0>(P_curve_2_3 - vPP),
+        htfm00,
+        madd(shuffle<1>(P_curve_2_3 - vPP), htfm11, shuffle<2>(P_curve_2_3 - vPP) * htfm22));
+
+    const ssef p0 = _mm256_castps256_ps128(p01);
+    const ssef p1 = _mm256_extractf128_ps(p01, 1);
+    const ssef p2 = _mm256_castps256_ps128(p23);
+    const ssef p3 = _mm256_extractf128_ps(p23, 1);
+
+    const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
+    r_st = ((float4 &)P_curve_1).w;
+    const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
+    r_en = ((float4 &)P_curve_2).w;
+#    else  /* __KERNEL_AVX2__ */
+    ssef htfm[] = {htfm0, htfm1, htfm2};
+    ssef vP = load4f(P);
+    ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
+    ssef p1 = transform_point_T3(htfm, P_curve[1] - vP);
+    ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
+    ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
+
+    r_st = ((float4 &)P_curve[1]).w;
+    r_en = ((float4 &)P_curve[2]).w;
+#    endif /* __KERNEL_AVX2__ */
+
+    float fc = 0.71f;
+    ssef vfc = ssef(fc);
+    ssef vfcxp3 = vfc * p3;
+
+    vcurve_coef[0] = p1;
+    vcurve_coef[1] = vfc * (p2 - p0);
+    vcurve_coef[2] = madd(
+        ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
+    vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
+  }
+#  else
+  float3 curve_coef[4];
+
+  /* curve Intersection check */
+  /* obtain curve parameters */
+  {
+    /* ray transform created - this should be created at beginning of intersection loop */
+    Transform htfm;
+    float d = sqrtf(dir.x * dir.x + dir.z * dir.z);
+    htfm = make_transform(dir.z / d,
+                          0,
+                          -dir.x / d,
+                          0,
+                          -dir.x * dir.y / d,
+                          d,
+                          -dir.y * dir.z / d,
+                          0,
+                          dir.x,
+                          dir.y,
+                          dir.z,
+                          0);
+
+    float4 v00 = kernel_tex_fetch(__curves, prim);
+
+    int k0 = __float_as_int(v00.x) + segment;
+    int k1 = k0 + 1;
+
+    int ka = max(k0 - 1, __float_as_int(v00.x));
+    int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
+
+    float4 P_curve[4];
+
+    if (is_curve_primitive) {
+      P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
+      P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
+      P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
+      P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
+    }
+    else {
+      int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+      motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve);
+    }
+
+    float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P);
+    float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P);
+    float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P);
+    float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P);
+
+    float fc = 0.71f;
+    curve_coef[0] = p1;
+    curve_coef[1] = -fc * p0 + fc * p2;
+    curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3;
+    curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3;
+    r_st = P_curve[1].w;
+    r_en = P_curve[2].w;
+  }
+#  endif
+
+  float r_curr = max(r_st, r_en);
+
+  if ((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING))
+    epsilon = 2 * r_curr;
+
+  /* find bounds - this is slow for cubic curves */
+  float upper, lower;
+
+  float zextrem[4];
+  curvebounds(&lower,
+              &upper,
+              &zextrem[0],
+              &zextrem[1],
+              &zextrem[2],
+              &zextrem[3],
+              curve_coef[0].z,
+              curve_coef[1].z,
+              curve_coef[2].z,
+              curve_coef[3].z);
+  if (lower - r_curr > isect->t || upper + r_curr < epsilon)
+    return false;
+
+  /* minimum width extension */
+  float mw_extension = min(difl * fabsf(upper), extmax);
+  float r_ext = mw_extension + r_curr;
+
+  float xextrem[4];
+  curvebounds(&lower,
+              &upper,
+              &xextrem[0],
+              &xextrem[1],
+              &xextrem[2],
+              &xextrem[3],
+              curve_coef[0].x,
+              curve_coef[1].x,
+              curve_coef[2].x,
+              curve_coef[3].x);
+  if (lower > r_ext || upper < -r_ext)
+    return false;
+
+  float yextrem[4];
+  curvebounds(&lower,
+              &upper,
+              &yextrem[0],
+              &yextrem[1],
+              &yextrem[2],
+              &yextrem[3],
+              curve_coef[0].y,
+              curve_coef[1].y,
+              curve_coef[2].y,
+              curve_coef[3].y);
+  if (lower > r_ext || upper < -r_ext)
+    return false;
+
+  /* setup recurrent loop */
+  int level = 1 << depth;
+  int tree = 0;
+  float resol = 1.0f / (float)level;
+  bool hit = false;
+
+  /* begin loop */
+  while (!(tree >> (depth))) {
+    const float i_st = tree * resol;
+    const float i_en = i_st + (level * resol);
+
+#  ifdef __KERNEL_SSE2__
+    ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
+    ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]),
+                      vi_st,
+                      vcurve_coef[0]);
+    ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]),
+                      vi_en,
+                      vcurve_coef[0]);
+
+    ssef vbmin = min(vp_st, vp_en);
+    ssef vbmax = max(vp_st, vp_en);
+
+    float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax;
+    float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z;
+    float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z;
+    float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en;
+#  else
+    float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st +
+                  curve_coef[0];
+    float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en +
+                  curve_coef[0];
+
+    float bminx = min(p_st.x, p_en.x);
+    float bmaxx = max(p_st.x, p_en.x);
+    float bminy = min(p_st.y, p_en.y);
+    float bmaxy = max(p_st.y, p_en.y);
+    float bminz = min(p_st.z, p_en.z);
+    float bmaxz = max(p_st.z, p_en.z);
+#  endif
+
+    if (xextrem[0] >= i_st && xextrem[0] <= i_en) {
+      bminx = min(bminx, xextrem[1]);
+      bmaxx = max(bmaxx, xextrem[1]);
+    }
+    if (xextrem[2] >= i_st && xextrem[2] <= i_en) {
+      bminx = min(bminx, xextrem[3]);
+      bmaxx = max(bmaxx, xextrem[3]);
+    }
+    if (yextrem[0] >= i_st && yextrem[0] <= i_en) {
+      bminy = min(bminy, yextrem[1]);
+      bmaxy = max(bmaxy, yextrem[1]);
+    }
+    if (yextrem[2] >= i_st && yextrem[2] <= i_en) {
+      bminy = min(bminy, yextrem[3]);
+      bmaxy = max(bmaxy, yextrem[3]);
+    }
+    if (zextrem[0] >= i_st && zextrem[0] <= i_en) {
+      bminz = min(bminz, zextrem[1]);
+      bmaxz = max(bmaxz, zextrem[1]);
+    }
+    if (zextrem[2] >= i_st && zextrem[2] <= i_en) {
+      bminz = min(bminz, zextrem[3]);
+      bmaxz = max(bmaxz, zextrem[3]);
+    }
+
+    float r1 = r_st + (r_en - r_st) * i_st;
+    float r2 = r_st + (r_en - r_st) * i_en;
+    r_curr = max(r1, r2);
+
+    mw_extension = min(difl * fabsf(bmaxz), extmax);
+    float r_ext = mw_extension + r_curr;
+    float coverage = 1.0f;
+
+    if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext || bmaxx < -r_ext ||
+        bminy > r_ext || bmaxy < -r_ext) {
+      /* the bounding box does not overlap the square centered at O */
+      tree += level;
+      level = tree & -tree;
+    }
+    else if (level == 1) {
+
+      /* the maximum recursion depth is reached.
+       * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
+       * dP* is reversed if necessary.*/
+      float t = isect->t;
+      float u = 0.0f;
+      float gd = 0.0f;
+
+      if (flags & CURVE_KN_RIBBONS) {
+        float3 tg = (p_en - p_st);
+#  ifdef __KERNEL_SSE__
+        const float3 tg_sq = tg * tg;
+        float w = tg_sq.x + tg_sq.y;
+#  else
+        float w = tg.x * tg.x + tg.y * tg.y;
+#  endif
+        if (w == 0) {
+          tree++;
+          level = tree & -tree;
+          continue;
+        }
+#  ifdef __KERNEL_SSE__
+        const float3 p_sttg = p_st * tg;
+        w = -(p_sttg.x + p_sttg.y) / w;
+#  else
+        w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
+#  endif
+        w = saturate(w);
+
+        /* compute u on the curve segment */
+        u = i_st * (1 - w) + i_en * w;
+        r_curr = r_st + (r_en - r_st) * u;
+        /* compare x-y distances */
+        float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u +
+                        curve_coef[0];
+
+        float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
+        if (dot(tg, dp_st) < 0)
+          dp_st *= -1;
+        if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
+          tree++;
+          level = tree & -tree;
+          continue;
+        }
+        float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
+        if (dot(tg, dp_en) < 0)
+          dp_en *= -1;
+        if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
+          tree++;
+          level = tree & -tree;
+          continue;
+        }
+
+        /* compute coverage */
+        float r_ext = r_curr;
+        coverage = 1.0f;
+        if (difl != 0.0f) {
+          mw_extension = min(difl * fabsf(bmaxz), extmax);
+          r_ext = mw_extension + r_curr;
+#  ifdef __KERNEL_SSE__
+          const float3 p_curr_sq = p_curr * p_curr;
+          const float3 dxxx(_mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128)));
+          float d = dxxx.x;
+#  else
+          float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
+#  endif
+          float d0 = d - r_curr;
+          float d1 = d + r_curr;
+          float inv_mw_extension = 1.0f / mw_extension;
+          if (d0 >= 0)
+            coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) *
+                       0.5f;
+          else  // inside
+            coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) *
+                       0.5f;
+        }
+
+        if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon ||
+            isect->t < p_curr.z) {
+          tree++;
+          level = tree & -tree;
+          continue;
+        }
+
+        t = p_curr.z;
+
+        /* stochastic fade from minimum width */
+        if (difl != 0.0f && lcg_state) {
+          if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
+            return hit;
+        }
+      }
+      else {
+        float l = len(p_en - p_st);
+        /* minimum width extension */
+        float or1 = r1;
+        float or2 = r2;
+
+        if (difl != 0.0f) {
+          mw_extension = min(len(p_st - P) * difl, extmax);
+          or1 = r1 < mw_extension ? mw_extension : r1;
+          mw_extension = min(len(p_en - P) * difl, extmax);
+          or2 = r2 < mw_extension ? mw_extension : r2;
+        }
+        /* --- */
+        float invl = 1.0f / l;
+        float3 tg = (p_en - p_st) * invl;
+        gd = (or2 - or1) * invl;
+        float difz = -dot(p_st, tg);
+        float cyla = 1.0f - (tg.z * tg.z * (1 + gd * gd));
+        float invcyla = 1.0f / cyla;
+        float halfb = (-p_st.z - tg.z * (difz + gd * (difz * gd + or1)));
+        float tcentre = -halfb * invcyla;
+        float zcentre = difz + (tg.z * tcentre);
+        float3 tdif = -p_st;
+        tdif.z += tcentre;
+        float tdifz = dot(tdif, tg);
+        float tb = 2 * (tdif.z - tg.z * (tdifz + gd * (tdifz * gd + or1)));
+        float tc = dot(tdif, tdif) - tdifz * tdifz * (1 + gd * gd) - or1 * or1 -
+                   2 * or1 * tdifz * gd;
+        float td = tb * tb - 4 * cyla * tc;
+        if (td < 0.0f) {
+          tree++;
+          level = tree & -tree;
+          continue;
+        }
+
+        float rootd = sqrtf(td);
+        float correction = (-tb - rootd) * 0.5f * invcyla;
+        t = tcentre + correction;
+
+        float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
+        if (dot(tg, dp_st) < 0)
+          dp_st *= -1;
+        float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
+        if (dot(tg, dp_en) < 0)
+          dp_en *= -1;
+
+        if (flags & CURVE_KN_BACKFACING &&
+            (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 ||
+             isect->t < t || t <= 0.0f)) {
+          correction = (-tb + rootd) * 0.5f * invcyla;
+          t = tcentre + correction;
+        }
+
+        if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 ||
+            isect->t < t || t <= 0.0f) {
+          tree++;
+          level = tree & -tree;
+          continue;
+        }
+
+        float w = (zcentre + (tg.z * correction)) * invl;
+        w = saturate(w);
+        /* compute u on the curve segment */
+        u = i_st * (1 - w) + i_en * w;
+
+        /* stochastic fade from minimum width */
+        if (difl != 0.0f && lcg_state) {
+          r_curr = r1 + (r2 - r1) * w;
+          r_ext = or1 + (or2 - or1) * w;
+          coverage = r_curr / r_ext;
+
+          if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
+            return hit;
+        }
+      }
+      /* we found a new intersection */
+
+#  ifdef __VISIBILITY_FLAG__
+      /* visibility flag test. we do it here under the assumption
+       * that most triangles are culled by node flags */
+      if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
+#  endif
+      {
+        /* record intersection */
+        isect->t = t;
+        isect->u = u;
+        isect->v = gd;
+        isect->prim = curveAddr;
+        isect->object = object;
+        isect->type = type;
+        hit = true;
+      }
+
+      tree++;
+      level = tree & -tree;
+    }
+    else {
+      /* split the curve into two curves and process */
+      level = level >> 1;
+    }
+  }
+
+  return hit;
 }
 
 ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
@@ -511,245 +561,247 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
                                             float difl,
                                             float extmax)
 {
-	/* define few macros to minimize code duplication for SSE */
-#ifndef __KERNEL_SSE2__
-#  define len3_squared(x) len_squared(x)
-#  define len3(x) len(x)
-#  define dot3(x, y) dot(x, y)
-#endif
-
-	const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
-
-	if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
-		const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
-		if(time < prim_time.x || time > prim_time.y) {
-			return false;
-		}
-	}
-
-	int segment = PRIMITIVE_UNPACK_SEGMENT(type);
-	/* curve Intersection check */
-	int flags = kernel_data.curve.curveflags;
-
-	int prim = kernel_tex_fetch(__prim_index, curveAddr);
-	float4 v00 = kernel_tex_fetch(__curves, prim);
-
-	int cnum = __float_as_int(v00.x);
-	int k0 = cnum + segment;
-	int k1 = k0 + 1;
-
-#ifndef __KERNEL_SSE2__
-	float4 P_curve[2];
-
-	if(is_curve_primitive) {
-		P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
-		P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
-	}
-	else {
-		int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
-		motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve);
-	}
-
-	float or1 = P_curve[0].w;
-	float or2 = P_curve[1].w;
-	float3 p1 = float4_to_float3(P_curve[0]);
-	float3 p2 = float4_to_float3(P_curve[1]);
-
-	/* minimum width extension */
-	float r1 = or1;
-	float r2 = or2;
-	float3 dif = P - p1;
-	float3 dif_second = P - p2;
-	if(difl != 0.0f) {
-		float pixelsize = min(len3(dif) * difl, extmax);
-		r1 = or1 < pixelsize ? pixelsize : or1;
-		pixelsize = min(len3(dif_second) * difl, extmax);
-		r2 = or2 < pixelsize ? pixelsize : or2;
-	}
-	/* --- */
-
-	float3 p21_diff = p2 - p1;
-	float3 sphere_dif1 = (dif + dif_second) * 0.5f;
-	float3 dir = direction;
-	float sphere_b_tmp = dot3(dir, sphere_dif1);
-	float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir;
-#else
-	ssef P_curve[2];
-
-	if(is_curve_primitive) {
-		P_curve[0] = load4f(&kg->__curve_keys.data[k0].x);
-		P_curve[1] = load4f(&kg->__curve_keys.data[k1].x);
-	}
-	else {
-		int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
-		motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4*)&P_curve);
-	}
-
-	const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]);
-
-	ssef r12 = or12;
-	const ssef vP = load4f(P);
-	const ssef dif = vP - P_curve[0];
-	const ssef dif_second = vP - P_curve[1];
-	if(difl != 0.0f) {
-		const ssef len1_sq = len3_squared_splat(dif);
-		const ssef len2_sq = len3_squared_splat(dif_second);
-		const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq));
-		const ssef pixelsize12 = min(len12 * difl, ssef(extmax));
-		r12 = max(or12, pixelsize12);
-	}
-	float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12));
-	float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12));
-
-	const ssef p21_diff = P_curve[1] - P_curve[0];
-	const ssef sphere_dif1 = (dif + dif_second) * 0.5f;
-	const ssef dir = load4f(direction);
-	const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1);
-	const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1);
-#endif
-
-	float mr = max(r1, r2);
-	float l = len3(p21_diff);
-	float invl = 1.0f / l;
-	float sp_r = mr + 0.5f * l;
-
-	float sphere_b = dot3(dir, sphere_dif2);
-	float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r;
-
-	if(sdisc < 0.0f)
-		return false;
-
-	/* obtain parameters and test midpoint distance for suitable modes */
-#ifndef __KERNEL_SSE2__
-	float3 tg = p21_diff * invl;
-#else
-	const ssef tg = p21_diff * invl;
-#endif
-	float gd = (r2 - r1) * invl;
-
-	float dirz = dot3(dir, tg);
-	float difz = dot3(dif, tg);
-
-	float a = 1.0f - (dirz*dirz*(1 + gd*gd));
-
-	float halfb = dot3(dir, dif) - dirz*(difz + gd*(difz*gd + r1));
-
-	float tcentre = -halfb/a;
-	float zcentre = difz + (dirz * tcentre);
-
-	if((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE))
-		return false;
-	if((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && !(flags & CURVE_KN_INTERSECTCORRECTION))
-		return false;
-
-	/* test minimum separation */
-#ifndef __KERNEL_SSE2__
-	float3 cprod = cross(tg, dir);
-	float cprod2sq = len3_squared(cross(tg, dif));
-#else
-	const ssef cprod = cross(tg, dir);
-	float cprod2sq = len3_squared(cross_zxy(tg, dif));
-#endif
-	float cprodsq = len3_squared(cprod);
-	float distscaled = dot3(cprod, dif);
-
-	if(cprodsq == 0)
-		distscaled = cprod2sq;
-	else
-		distscaled = (distscaled*distscaled)/cprodsq;
-
-	if(distscaled > mr*mr)
-		return false;
-
-	/* calculate true intersection */
-#ifndef __KERNEL_SSE2__
-	float3 tdif = dif + tcentre * dir;
-#else
-	const ssef tdif = madd(ssef(tcentre), dir, dif);
-#endif
-	float tdifz = dot3(tdif, tg);
-	float tdifma = tdifz*gd + r1;
-	float tb = 2*(dot3(dir, tdif) - dirz*(tdifz + gd*tdifma));
-	float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma;
-	float td = tb*tb - 4*a*tc;
-
-	if(td < 0.0f)
-		return false;
-
-	float rootd = 0.0f;
-	float correction = 0.0f;
-	if(flags & CURVE_KN_ACCURATE) {
-		rootd = sqrtf(td);
-		correction = ((-tb - rootd)/(2*a));
-	}
-
-	float t = tcentre + correction;
-
-	if(t < isect->t) {
-
-		if(flags & CURVE_KN_INTERSECTCORRECTION) {
-			rootd = sqrtf(td);
-			correction = ((-tb - rootd)/(2*a));
-			t = tcentre + correction;
-		}
-
-		float z = zcentre + (dirz * correction);
-		// bool backface = false;
-
-		if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) {
-			// backface = true;
-			correction = ((-tb + rootd)/(2*a));
-			t = tcentre + correction;
-			z = zcentre + (dirz * correction);
-		}
-
-		/* stochastic fade from minimum width */
-		float adjradius = or1 + z * (or2 - or1) * invl;
-		adjradius = adjradius / (r1 + z * gd);
-		if(lcg_state && adjradius != 1.0f) {
-			if(lcg_step_float(lcg_state) > adjradius)
-				return false;
-		}
-		/* --- */
-
-		if(t > 0.0f && t < isect->t && z >= 0 && z <= l) {
-
-			if(flags & CURVE_KN_ENCLOSEFILTER) {
-				float enc_ratio = 1.01f;
-				if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) {
-					float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio));
-					float c2 = dot3(dif, dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio;
-					if(a2*c2 < 0.0f)
-						return false;
-				}
-			}
-
-#ifdef __VISIBILITY_FLAG__
-			/* visibility flag test. we do it here under the assumption
-			 * that most triangles are culled by node flags */
-			if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
-#endif
-			{
-				/* record intersection */
-				isect->t = t;
-				isect->u = z*invl;
-				isect->v = gd;
-				isect->prim = curveAddr;
-				isect->object = object;
-				isect->type = type;
-
-				return true;
-			}
-		}
-	}
-
-	return false;
-
-#ifndef __KERNEL_SSE2__
-#  undef len3_squared
-#  undef len3
-#  undef dot3
-#endif
+  /* define few macros to minimize code duplication for SSE */
+#  ifndef __KERNEL_SSE2__
+#    define len3_squared(x) len_squared(x)
+#    define len3(x) len(x)
+#    define dot3(x, y) dot(x, y)
+#  endif
+
+  const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
+
+  if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
+    const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
+    if (time < prim_time.x || time > prim_time.y) {
+      return false;
+    }
+  }
+
+  int segment = PRIMITIVE_UNPACK_SEGMENT(type);
+  /* curve Intersection check */
+  int flags = kernel_data.curve.curveflags;
+
+  int prim = kernel_tex_fetch(__prim_index, curveAddr);
+  float4 v00 = kernel_tex_fetch(__curves, prim);
+
+  int cnum = __float_as_int(v00.x);
+  int k0 = cnum + segment;
+  int k1 = k0 + 1;
+
+#  ifndef __KERNEL_SSE2__
+  float4 P_curve[2];
+
+  if (is_curve_primitive) {
+    P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
+    P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
+  }
+  else {
+    int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+    motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve);
+  }
+
+  float or1 = P_curve[0].w;
+  float or2 = P_curve[1].w;
+  float3 p1 = float4_to_float3(P_curve[0]);
+  float3 p2 = float4_to_float3(P_curve[1]);
+
+  /* minimum width extension */
+  float r1 = or1;
+  float r2 = or2;
+  float3 dif = P - p1;
+  float3 dif_second = P - p2;
+  if (difl != 0.0f) {
+    float pixelsize = min(len3(dif) * difl, extmax);
+    r1 = or1 < pixelsize ? pixelsize : or1;
+    pixelsize = min(len3(dif_second) * difl, extmax);
+    r2 = or2 < pixelsize ? pixelsize : or2;
+  }
+  /* --- */
+
+  float3 p21_diff = p2 - p1;
+  float3 sphere_dif1 = (dif + dif_second) * 0.5f;
+  float3 dir = direction;
+  float sphere_b_tmp = dot3(dir, sphere_dif1);
+  float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir;
+#  else
+  ssef P_curve[2];
+
+  if (is_curve_primitive) {
+    P_curve[0] = load4f(&kg->__curve_keys.data[k0].x);
+    P_curve[1] = load4f(&kg->__curve_keys.data[k1].x);
+  }
+  else {
+    int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+    motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4 *)&P_curve);
+  }
+
+  const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]);
+
+  ssef r12 = or12;
+  const ssef vP = load4f(P);
+  const ssef dif = vP - P_curve[0];
+  const ssef dif_second = vP - P_curve[1];
+  if (difl != 0.0f) {
+    const ssef len1_sq = len3_squared_splat(dif);
+    const ssef len2_sq = len3_squared_splat(dif_second);
+    const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq));
+    const ssef pixelsize12 = min(len12 * difl, ssef(extmax));
+    r12 = max(or12, pixelsize12);
+  }
+  float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12));
+  float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12));
+
+  const ssef p21_diff = P_curve[1] - P_curve[0];
+  const ssef sphere_dif1 = (dif + dif_second) * 0.5f;
+  const ssef dir = load4f(direction);
+  const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1);
+  const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1);
+#  endif
+
+  float mr = max(r1, r2);
+  float l = len3(p21_diff);
+  float invl = 1.0f / l;
+  float sp_r = mr + 0.5f * l;
+
+  float sphere_b = dot3(dir, sphere_dif2);
+  float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r;
+
+  if (sdisc < 0.0f)
+    return false;
+
+    /* obtain parameters and test midpoint distance for suitable modes */
+#  ifndef __KERNEL_SSE2__
+  float3 tg = p21_diff * invl;
+#  else
+  const ssef tg = p21_diff * invl;
+#  endif
+  float gd = (r2 - r1) * invl;
+
+  float dirz = dot3(dir, tg);
+  float difz = dot3(dif, tg);
+
+  float a = 1.0f - (dirz * dirz * (1 + gd * gd));
+
+  float halfb = dot3(dir, dif) - dirz * (difz + gd * (difz * gd + r1));
+
+  float tcentre = -halfb / a;
+  float zcentre = difz + (dirz * tcentre);
+
+  if ((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE))
+    return false;
+  if ((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) &&
+      !(flags & CURVE_KN_INTERSECTCORRECTION))
+    return false;
+
+    /* test minimum separation */
+#  ifndef __KERNEL_SSE2__
+  float3 cprod = cross(tg, dir);
+  float cprod2sq = len3_squared(cross(tg, dif));
+#  else
+  const ssef cprod = cross(tg, dir);
+  float cprod2sq = len3_squared(cross_zxy(tg, dif));
+#  endif
+  float cprodsq = len3_squared(cprod);
+  float distscaled = dot3(cprod, dif);
+
+  if (cprodsq == 0)
+    distscaled = cprod2sq;
+  else
+    distscaled = (distscaled * distscaled) / cprodsq;
+
+  if (distscaled > mr * mr)
+    return false;
+
+    /* calculate true intersection */
+#  ifndef __KERNEL_SSE2__
+  float3 tdif = dif + tcentre * dir;
+#  else
+  const ssef tdif = madd(ssef(tcentre), dir, dif);
+#  endif
+  float tdifz = dot3(tdif, tg);
+  float tdifma = tdifz * gd + r1;
+  float tb = 2 * (dot3(dir, tdif) - dirz * (tdifz + gd * tdifma));
+  float tc = dot3(tdif, tdif) - tdifz * tdifz - tdifma * tdifma;
+  float td = tb * tb - 4 * a * tc;
+
+  if (td < 0.0f)
+    return false;
+
+  float rootd = 0.0f;
+  float correction = 0.0f;
+  if (flags & CURVE_KN_ACCURATE) {
+    rootd = sqrtf(td);
+    correction = ((-tb - rootd) / (2 * a));
+  }
+
+  float t = tcentre + correction;
+
+  if (t < isect->t) {
+
+    if (flags & CURVE_KN_INTERSECTCORRECTION) {
+      rootd = sqrtf(td);
+      correction = ((-tb - rootd) / (2 * a));
+      t = tcentre + correction;
+    }
+
+    float z = zcentre + (dirz * correction);
+    // bool backface = false;
+
+    if (flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) {
+      // backface = true;
+      correction = ((-tb + rootd) / (2 * a));
+      t = tcentre + correction;
+      z = zcentre + (dirz * correction);
+    }
+
+    /* stochastic fade from minimum width */
+    float adjradius = or1 + z * (or2 - or1) * invl;
+    adjradius = adjradius / (r1 + z * gd);
+    if (lcg_state && adjradius != 1.0f) {
+      if (lcg_step_float(lcg_state) > adjradius)
+        return false;
+    }
+    /* --- */
+
+    if (t > 0.0f && t < isect->t && z >= 0 && z <= l) {
+
+      if (flags & CURVE_KN_ENCLOSEFILTER) {
+        float enc_ratio = 1.01f;
+        if ((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) {
+          float a2 = 1.0f - (dirz * dirz * (1 + gd * gd * enc_ratio * enc_ratio));
+          float c2 = dot3(dif, dif) - difz * difz * (1 + gd * gd * enc_ratio * enc_ratio) -
+                     r1 * r1 * enc_ratio * enc_ratio - 2 * r1 * difz * gd * enc_ratio;
+          if (a2 * c2 < 0.0f)
+            return false;
+        }
+      }
+
+#  ifdef __VISIBILITY_FLAG__
+      /* visibility flag test. we do it here under the assumption
+       * that most triangles are culled by node flags */
+      if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
+#  endif
+      {
+        /* record intersection */
+        isect->t = t;
+        isect->u = z * invl;
+        isect->v = gd;
+        isect->prim = curveAddr;
+        isect->object = object;
+        isect->type = type;
+
+        return true;
+      }
+    }
+  }
+
+  return false;
+
+#  ifndef __KERNEL_SSE2__
+#    undef len3_squared
+#    undef len3
+#    undef dot3
+#  endif
 }
 
 ccl_device_inline float3 curve_refine(KernelGlobals *kg,
@@ -757,154 +809,154 @@ ccl_device_inline float3 curve_refine(KernelGlobals *kg,
                                       const Intersection *isect,
                                       const Ray *ray)
 {
-	int flag = kernel_data.curve.curveflags;
-	float t = isect->t;
-	float3 P = ray->P;
-	float3 D = ray->D;
-
-	if(isect->object != OBJECT_NONE) {
-#ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_itfm;
-#else
-		Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
-#endif
-
-		P = transform_point(&tfm, P);
-		D = transform_direction(&tfm, D*t);
-		D = normalize_len(D, &t);
-	}
-
-	int prim = kernel_tex_fetch(__prim_index, isect->prim);
-	float4 v00 = kernel_tex_fetch(__curves, prim);
-
-	int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-	int k1 = k0 + 1;
-
-	float3 tg;
-
-	if(flag & CURVE_KN_INTERPOLATE) {
-		int ka = max(k0 - 1,__float_as_int(v00.x));
-		int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1);
-
-		float4 P_curve[4];
-
-		if(sd->type & PRIMITIVE_CURVE) {
-			P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
-			P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
-			P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
-			P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
-		}
-		else {
-			motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
-		}
-
-		float3 p[4];
-		p[0] = float4_to_float3(P_curve[0]);
-		p[1] = float4_to_float3(P_curve[1]);
-		p[2] = float4_to_float3(P_curve[2]);
-		p[3] = float4_to_float3(P_curve[3]);
-
-		P = P + D*t;
-
-#ifdef __UV__
-		sd->u = isect->u;
-		sd->v = 0.0f;
-#endif
-
-		tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
-
-		if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
-			sd->Ng = normalize(-(D - tg * (dot(tg, D))));
-		}
-		else {
-#ifdef __EMBREE__
- 			if(kernel_data.bvh.scene) {
- 				sd->Ng = normalize(isect->Ng);
- 			}
- 			else
-#endif
-			{
-				/* direction from inside to surface of curve */
-				float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
-				sd->Ng = normalize(P - p_curr);
-
-				/* adjustment for changing radius */
-				float gd = isect->v;
-
-				if(gd != 0.0f) {
-					sd->Ng = sd->Ng - gd * tg;
-					sd->Ng = normalize(sd->Ng);
-				}
-			}
-		}
-
-		/* todo: sometimes the normal is still so that this is detected as
-		 * backfacing even if cull backfaces is enabled */
-
-		sd->N = sd->Ng;
-	}
-	else {
-		float4 P_curve[2];
-
-		if(sd->type & PRIMITIVE_CURVE) {
-			P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
-			P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
-		}
-		else {
-			motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
-		}
-
-		float l = 1.0f;
-		tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l);
-
-		P = P + D*t;
-
-		float3 dif = P - float4_to_float3(P_curve[0]);
-
-#ifdef __UV__
-		sd->u = dot(dif,tg)/l;
-		sd->v = 0.0f;
-#endif
-
-		if(flag & CURVE_KN_TRUETANGENTGNORMAL) {
-			sd->Ng = -(D - tg * dot(tg, D));
-			sd->Ng = normalize(sd->Ng);
-		}
-		else {
-			float gd = isect->v;
-
-			/* direction from inside to surface of curve */
-			float denom = fmaxf(P_curve[0].w + sd->u * l * gd, 1e-8f);
-			sd->Ng = (dif - tg * sd->u * l) / denom;
-
-			/* adjustment for changing radius */
-			if(gd != 0.0f) {
-				sd->Ng = sd->Ng - gd * tg;
-			}
-
-			sd->Ng = normalize(sd->Ng);
-		}
-
-		sd->N = sd->Ng;
-	}
-
-#ifdef __DPDU__
-	/* dPdu/dPdv */
-	sd->dPdu = tg;
-	sd->dPdv = cross(tg, sd->Ng);
-#endif
-
-	if(isect->object != OBJECT_NONE) {
-#ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_tfm;
-#else
-		Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
-#endif
-
-		P = transform_point(&tfm, P);
-	}
-
-	return P;
+  int flag = kernel_data.curve.curveflags;
+  float t = isect->t;
+  float3 P = ray->P;
+  float3 D = ray->D;
+
+  if (isect->object != OBJECT_NONE) {
+#  ifdef __OBJECT_MOTION__
+    Transform tfm = sd->ob_itfm;
+#  else
+    Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+#  endif
+
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D * t);
+    D = normalize_len(D, &t);
+  }
+
+  int prim = kernel_tex_fetch(__prim_index, isect->prim);
+  float4 v00 = kernel_tex_fetch(__curves, prim);
+
+  int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+  int k1 = k0 + 1;
+
+  float3 tg;
+
+  if (flag & CURVE_KN_INTERPOLATE) {
+    int ka = max(k0 - 1, __float_as_int(v00.x));
+    int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
+
+    float4 P_curve[4];
+
+    if (sd->type & PRIMITIVE_CURVE) {
+      P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
+      P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
+      P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
+      P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
+    }
+    else {
+      motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
+    }
+
+    float3 p[4];
+    p[0] = float4_to_float3(P_curve[0]);
+    p[1] = float4_to_float3(P_curve[1]);
+    p[2] = float4_to_float3(P_curve[2]);
+    p[3] = float4_to_float3(P_curve[3]);
+
+    P = P + D * t;
+
+#  ifdef __UV__
+    sd->u = isect->u;
+    sd->v = 0.0f;
+#  endif
+
+    tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
+
+    if (kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
+      sd->Ng = normalize(-(D - tg * (dot(tg, D))));
+    }
+    else {
+#  ifdef __EMBREE__
+      if (kernel_data.bvh.scene) {
+        sd->Ng = normalize(isect->Ng);
+      }
+      else
+#  endif
+      {
+        /* direction from inside to surface of curve */
+        float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
+        sd->Ng = normalize(P - p_curr);
+
+        /* adjustment for changing radius */
+        float gd = isect->v;
+
+        if (gd != 0.0f) {
+          sd->Ng = sd->Ng - gd * tg;
+          sd->Ng = normalize(sd->Ng);
+        }
+      }
+    }
+
+    /* todo: sometimes the normal is still so that this is detected as
+     * backfacing even if cull backfaces is enabled */
+
+    sd->N = sd->Ng;
+  }
+  else {
+    float4 P_curve[2];
+
+    if (sd->type & PRIMITIVE_CURVE) {
+      P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
+      P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
+    }
+    else {
+      motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+    }
+
+    float l = 1.0f;
+    tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l);
+
+    P = P + D * t;
+
+    float3 dif = P - float4_to_float3(P_curve[0]);
+
+#  ifdef __UV__
+    sd->u = dot(dif, tg) / l;
+    sd->v = 0.0f;
+#  endif
+
+    if (flag & CURVE_KN_TRUETANGENTGNORMAL) {
+      sd->Ng = -(D - tg * dot(tg, D));
+      sd->Ng = normalize(sd->Ng);
+    }
+    else {
+      float gd = isect->v;
+
+      /* direction from inside to surface of curve */
+      float denom = fmaxf(P_curve[0].w + sd->u * l * gd, 1e-8f);
+      sd->Ng = (dif - tg * sd->u * l) / denom;
+
+      /* adjustment for changing radius */
+      if (gd != 0.0f) {
+        sd->Ng = sd->Ng - gd * tg;
+      }
+
+      sd->Ng = normalize(sd->Ng);
+    }
+
+    sd->N = sd->Ng;
+  }
+
+#  ifdef __DPDU__
+  /* dPdu/dPdv */
+  sd->dPdu = tg;
+  sd->dPdv = cross(tg, sd->Ng);
+#  endif
+
+  if (isect->object != OBJECT_NONE) {
+#  ifdef __OBJECT_MOTION__
+    Transform tfm = sd->ob_tfm;
+#  else
+    Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+#  endif
+
+    P = transform_point(&tfm, P);
+  }
+
+  return P;
 }
 
 #endif
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 5cc22ae2155..7380c506bf4 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -25,96 +25,116 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __HAIR__
 
-ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem)
+ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg,
+                                                  int object,
+                                                  uint id,
+                                                  AttributeElement *elem)
 {
-	/* todo: find a better (faster) solution for this, maybe store offset per object.
-	 *
-	 * NOTE: currently it's not a bottleneck because in test scenes the loop below runs
-	 * zero iterations and rendering is really slow with motion curves. For until other
-	 * areas are speed up it's probably not so crucial to optimize this out.
-	 */
-	uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_CURVE;
-	uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-
-	while(attr_map.x != id) {
-		attr_offset += ATTR_PRIM_TYPES;
-		attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-	}
-
-	*elem = (AttributeElement)attr_map.y;
-
-	/* return result */
-	return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+  /* todo: find a better (faster) solution for this, maybe store offset per object.
+   *
+   * NOTE: currently it's not a bottleneck because in test scenes the loop below runs
+   * zero iterations and rendering is really slow with motion curves. For until other
+   * areas are speed up it's probably not so crucial to optimize this out.
+   */
+  uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_CURVE;
+  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+  while (attr_map.x != id) {
+    attr_offset += ATTR_PRIM_TYPES;
+    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+  }
+
+  *elem = (AttributeElement)attr_map.y;
+
+  /* return result */
+  return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
 }
 
-ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, float4 keys[2])
+ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
+                                                  int offset,
+                                                  int numkeys,
+                                                  int numsteps,
+                                                  int step,
+                                                  int k0,
+                                                  int k1,
+                                                  float4 keys[2])
 {
-	if(step == numsteps) {
-		/* center step: regular key location */
-		keys[0] = kernel_tex_fetch(__curve_keys, k0);
-		keys[1] = kernel_tex_fetch(__curve_keys, k1);
-	}
-	else {
-		/* center step is not stored in this array */
-		if(step > numsteps)
-			step--;
-
-		offset += step*numkeys;
-
-		keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
-		keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
-	}
+  if (step == numsteps) {
+    /* center step: regular key location */
+    keys[0] = kernel_tex_fetch(__curve_keys, k0);
+    keys[1] = kernel_tex_fetch(__curve_keys, k1);
+  }
+  else {
+    /* center step is not stored in this array */
+    if (step > numsteps)
+      step--;
+
+    offset += step * numkeys;
+
+    keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
+    keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
+  }
 }
 
 /* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
+ccl_device_inline void motion_curve_keys(
+    KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
 {
-	/* get motion info */
-	int numsteps, numkeys;
-	object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+  /* get motion info */
+  int numsteps, numkeys;
+  object_motion_info(kg, object, &numsteps, NULL, &numkeys);
 
-	/* figure out which steps we need to fetch and their interpolation factor */
-	int maxstep = numsteps*2;
-	int step = min((int)(time*maxstep), maxstep-1);
-	float t = time*maxstep - step;
+  /* figure out which steps we need to fetch and their interpolation factor */
+  int maxstep = numsteps * 2;
+  int step = min((int)(time * maxstep), maxstep - 1);
+  float t = time * maxstep - step;
 
-	/* find attribute */
-	AttributeElement elem;
-	int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
-	kernel_assert(offset != ATTR_STD_NOT_FOUND);
+  /* find attribute */
+  AttributeElement elem;
+  int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+  kernel_assert(offset != ATTR_STD_NOT_FOUND);
 
-	/* fetch key coordinates */
-	float4 next_keys[2];
+  /* fetch key coordinates */
+  float4 next_keys[2];
 
-	motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys);
-	motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, next_keys);
+  motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys);
+  motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys);
 
-	/* interpolate between steps */
-	keys[0] = (1.0f - t)*keys[0] + t*next_keys[0];
-	keys[1] = (1.0f - t)*keys[1] + t*next_keys[1];
+  /* interpolate between steps */
+  keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
+  keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
 }
 
-ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, int k2, int k3, float4 keys[4])
+ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg,
+                                                           int offset,
+                                                           int numkeys,
+                                                           int numsteps,
+                                                           int step,
+                                                           int k0,
+                                                           int k1,
+                                                           int k2,
+                                                           int k3,
+                                                           float4 keys[4])
 {
-	if(step == numsteps) {
-		/* center step: regular key location */
-		keys[0] = kernel_tex_fetch(__curve_keys, k0);
-		keys[1] = kernel_tex_fetch(__curve_keys, k1);
-		keys[2] = kernel_tex_fetch(__curve_keys, k2);
-		keys[3] = kernel_tex_fetch(__curve_keys, k3);
-	}
-	else {
-		/* center step is not stored in this array */
-		if(step > numsteps)
-			step--;
-
-		offset += step*numkeys;
-
-		keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
-		keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
-		keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2);
-		keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3);
-	}
+  if (step == numsteps) {
+    /* center step: regular key location */
+    keys[0] = kernel_tex_fetch(__curve_keys, k0);
+    keys[1] = kernel_tex_fetch(__curve_keys, k1);
+    keys[2] = kernel_tex_fetch(__curve_keys, k2);
+    keys[3] = kernel_tex_fetch(__curve_keys, k3);
+  }
+  else {
+    /* center step is not stored in this array */
+    if (step > numsteps)
+      step--;
+
+    offset += step * numkeys;
+
+    keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
+    keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
+    keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2);
+    keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3);
+  }
 }
 
 /* return 2 curve key locations */
@@ -122,37 +142,41 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
                                                   int object,
                                                   int prim,
                                                   float time,
-                                                  int k0, int k1, int k2, int k3,
+                                                  int k0,
+                                                  int k1,
+                                                  int k2,
+                                                  int k3,
                                                   float4 keys[4])
 {
-	/* get motion info */
-	int numsteps, numkeys;
-	object_motion_info(kg, object, &numsteps, NULL, &numkeys);
-
-	/* figure out which steps we need to fetch and their interpolation factor */
-	int maxstep = numsteps*2;
-	int step = min((int)(time*maxstep), maxstep-1);
-	float t = time*maxstep - step;
-
-	/* find attribute */
-	AttributeElement elem;
-	int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
-	kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-	/* fetch key coordinates */
-	float4 next_keys[4];
-
-	motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
-	motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, k2, k3, next_keys);
-
-	/* interpolate between steps */
-	keys[0] = (1.0f - t)*keys[0] + t*next_keys[0];
-	keys[1] = (1.0f - t)*keys[1] + t*next_keys[1];
-	keys[2] = (1.0f - t)*keys[2] + t*next_keys[2];
-	keys[3] = (1.0f - t)*keys[3] + t*next_keys[3];
+  /* get motion info */
+  int numsteps, numkeys;
+  object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+  /* figure out which steps we need to fetch and their interpolation factor */
+  int maxstep = numsteps * 2;
+  int step = min((int)(time * maxstep), maxstep - 1);
+  float t = time * maxstep - step;
+
+  /* find attribute */
+  AttributeElement elem;
+  int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+  kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+  /* fetch key coordinates */
+  float4 next_keys[4];
+
+  motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
+  motion_cardinal_curve_keys_for_step(
+      kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
+
+  /* interpolate between steps */
+  keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
+  keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
+  keys[2] = (1.0f - t) * keys[2] + t * next_keys[2];
+  keys[3] = (1.0f - t) * keys[3] + t * next_keys[3];
 }
 
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+#  if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
 /* Similar to above, but returns keys as pair of two AVX registers with each
  * holding two float4.
  */
@@ -160,56 +184,44 @@ ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg,
                                                       int object,
                                                       int prim,
                                                       float time,
-                                                      int k0, int k1,
-                                                      int k2, int k3,
+                                                      int k0,
+                                                      int k1,
+                                                      int k2,
+                                                      int k3,
                                                       avxf *out_keys_0_1,
                                                       avxf *out_keys_2_3)
 {
-	/* Get motion info. */
-	int numsteps, numkeys;
-	object_motion_info(kg, object, &numsteps, NULL, &numkeys);
-
-	/* Figure out which steps we need to fetch and their interpolation factor. */
-	int maxstep = numsteps * 2;
-	int step = min((int)(time*maxstep), maxstep - 1);
-	float t = time*maxstep - step;
-
-	/* Find attribute. */
-	AttributeElement elem;
-	int offset = find_attribute_curve_motion(kg,
-	                                         object,
-	                                         ATTR_STD_MOTION_VERTEX_POSITION,
-	                                         &elem);
-	kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-	/* Fetch key coordinates. */
-	float4 next_keys[4];
-	float4 keys[4];
-	motion_cardinal_curve_keys_for_step(kg,
-	                                    offset,
-	                                    numkeys,
-	                                    numsteps,
-	                                    step,
-	                                    k0, k1, k2, k3,
-	                                    keys);
-	motion_cardinal_curve_keys_for_step(kg,
-	                                    offset,
-	                                    numkeys,
-	                                    numsteps,
-	                                    step + 1,
-	                                    k0, k1, k2, k3,
-	                                    next_keys);
-
-	const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
-	const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
-	const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
-	const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
-
-	/* Interpolate between steps. */
-	*out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1;
-	*out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3;
+  /* Get motion info. */
+  int numsteps, numkeys;
+  object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+  /* Figure out which steps we need to fetch and their interpolation factor. */
+  int maxstep = numsteps * 2;
+  int step = min((int)(time * maxstep), maxstep - 1);
+  float t = time * maxstep - step;
+
+  /* Find attribute. */
+  AttributeElement elem;
+  int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+  kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+  /* Fetch key coordinates. */
+  float4 next_keys[4];
+  float4 keys[4];
+  motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
+  motion_cardinal_curve_keys_for_step(
+      kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
+
+  const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
+  const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
+  const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
+  const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
+
+  /* Interpolate between steps. */
+  *out_keys_0_1 = (1.0f - t) * keys_0_1 + t * next_keys_0_1;
+  *out_keys_2_3 = (1.0f - t) * keys_2_3 + t * next_keys_2_3;
 }
-#endif
+#  endif
 
 #endif
 
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index 64f6d027b99..53d6b92dd7e 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -29,127 +29,145 @@ CCL_NAMESPACE_BEGIN
 
 /* Time interpolation of vertex positions and normals */
 
-ccl_device_inline int find_attribute_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem)
+ccl_device_inline int find_attribute_motion(KernelGlobals *kg,
+                                            int object,
+                                            uint id,
+                                            AttributeElement *elem)
 {
-	/* todo: find a better (faster) solution for this, maybe store offset per object */
-	uint attr_offset = object_attribute_map_offset(kg, object);
-	uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+  /* todo: find a better (faster) solution for this, maybe store offset per object */
+  uint attr_offset = object_attribute_map_offset(kg, object);
+  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
 
-	while(attr_map.x != id) {
-		attr_offset += ATTR_PRIM_TYPES;
-		attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-	}
+  while (attr_map.x != id) {
+    attr_offset += ATTR_PRIM_TYPES;
+    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+  }
 
-	*elem = (AttributeElement)attr_map.y;
+  *elem = (AttributeElement)attr_map.y;
 
-	/* return result */
-	return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+  /* return result */
+  return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
 }
 
-ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 verts[3])
+ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg,
+                                                      uint4 tri_vindex,
+                                                      int offset,
+                                                      int numverts,
+                                                      int numsteps,
+                                                      int step,
+                                                      float3 verts[3])
 {
-	if(step == numsteps) {
-		/* center step: regular vertex location */
-		verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
-		verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
-		verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
-	}
-	else {
-		/* center step not store in this array */
-		if(step > numsteps)
-			step--;
-
-		offset += step*numverts;
-
-		verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
-		verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
-		verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
-	}
+  if (step == numsteps) {
+    /* center step: regular vertex location */
+    verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
+    verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
+    verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
+  }
+  else {
+    /* center step not store in this array */
+    if (step > numsteps)
+      step--;
+
+    offset += step * numverts;
+
+    verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
+    verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
+    verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
+  }
 }
 
-ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 normals[3])
+ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg,
+                                                        uint4 tri_vindex,
+                                                        int offset,
+                                                        int numverts,
+                                                        int numsteps,
+                                                        int step,
+                                                        float3 normals[3])
 {
-	if(step == numsteps) {
-		/* center step: regular vertex location */
-		normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
-		normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
-		normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
-	}
-	else {
-		/* center step is not stored in this array */
-		if(step > numsteps)
-			step--;
-
-		offset += step*numverts;
-
-		normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
-		normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
-		normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
-	}
+  if (step == numsteps) {
+    /* center step: regular vertex location */
+    normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
+    normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
+    normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
+  }
+  else {
+    /* center step is not stored in this array */
+    if (step > numsteps)
+      step--;
+
+    offset += step * numverts;
+
+    normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
+    normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
+    normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
+  }
 }
 
-ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, int prim, float time, float3 verts[3])
+ccl_device_inline void motion_triangle_vertices(
+    KernelGlobals *kg, int object, int prim, float time, float3 verts[3])
 {
-	/* get motion info */
-	int numsteps, numverts;
-	object_motion_info(kg, object, &numsteps, &numverts, NULL);
-
-	/* figure out which steps we need to fetch and their interpolation factor */
-	int maxstep = numsteps*2;
-	int step = min((int)(time*maxstep), maxstep-1);
-	float t = time*maxstep - step;
-
-	/* find attribute */
-	AttributeElement elem;
-	int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
-	kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-	/* fetch vertex coordinates */
-	float3 next_verts[3];
-	uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-
-	motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
-	motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
-
-	/* interpolate between steps */
-	verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
-	verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
-	verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
+  /* get motion info */
+  int numsteps, numverts;
+  object_motion_info(kg, object, &numsteps, &numverts, NULL);
+
+  /* figure out which steps we need to fetch and their interpolation factor */
+  int maxstep = numsteps * 2;
+  int step = min((int)(time * maxstep), maxstep - 1);
+  float t = time * maxstep - step;
+
+  /* find attribute */
+  AttributeElement elem;
+  int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+  kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+  /* fetch vertex coordinates */
+  float3 next_verts[3];
+  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+
+  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
+  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
+
+  /* interpolate between steps */
+  verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
+  verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
+  verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
 }
 
-ccl_device_inline float3 motion_triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int object, int prim, float u, float v, float time)
+ccl_device_inline float3 motion_triangle_smooth_normal(
+    KernelGlobals *kg, float3 Ng, int object, int prim, float u, float v, float time)
 {
-	/* get motion info */
-	int numsteps, numverts;
-	object_motion_info(kg, object, &numsteps, &numverts, NULL);
-
-	/* figure out which steps we need to fetch and their interpolation factor */
-	int maxstep = numsteps*2;
-	int step = min((int)(time*maxstep), maxstep-1);
-	float t = time*maxstep - step;
-
-	/* find attribute */
-	AttributeElement elem;
-	int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
-	kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-	/* fetch normals */
-	float3 normals[3], next_normals[3];
-	uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-
-	motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
-	motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
-
-	/* interpolate between steps */
-	normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
-	normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
-	normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
-
-	/* interpolate between vertices */
-	float w = 1.0f - u - v;
-	float3 N = safe_normalize(u*normals[0] + v*normals[1] + w*normals[2]);
-
-	return is_zero(N)? Ng: N;
+  /* get motion info */
+  int numsteps, numverts;
+  object_motion_info(kg, object, &numsteps, &numverts, NULL);
+
+  /* figure out which steps we need to fetch and their interpolation factor */
+  int maxstep = numsteps * 2;
+  int step = min((int)(time * maxstep), maxstep - 1);
+  float t = time * maxstep - step;
+
+  /* find attribute */
+  AttributeElement elem;
+  int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
+  kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+  /* fetch normals */
+  float3 normals[3], next_normals[3];
+  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+
+  motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
+  motion_triangle_normals_for_step(
+      kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
+
+  /* interpolate between steps */
+  normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
+  normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
+  normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
+
+  /* interpolate between vertices */
+  float w = 1.0f - u - v;
+  float3 N = safe_normalize(u * normals[0] + v * normals[1] + w * normals[2]);
+
+  return is_zero(N) ? Ng : N;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
index ec7bfad7349..49d4829af38 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -32,64 +32,57 @@ CCL_NAMESPACE_BEGIN
  * a closer distance.
  */
 
-ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg,
-                                                ShaderData *sd,
-                                                const Intersection *isect,
-                                                const Ray *ray,
-                                                float3 verts[3])
+ccl_device_inline float3 motion_triangle_refine(
+    KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
 {
-	float3 P = ray->P;
-	float3 D = ray->D;
-	float t = isect->t;
+  float3 P = ray->P;
+  float3 D = ray->D;
+  float t = isect->t;
 
 #ifdef __INTERSECTION_REFINE__
-	if(isect->object != OBJECT_NONE) {
-		if(UNLIKELY(t == 0.0f)) {
-			return P;
-		}
+  if (isect->object != OBJECT_NONE) {
+    if (UNLIKELY(t == 0.0f)) {
+      return P;
+    }
 #  ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_itfm;
+    Transform tfm = sd->ob_itfm;
 #  else
-		Transform tfm = object_fetch_transform(kg,
-		                                       isect->object,
-		                                       OBJECT_INVERSE_TRANSFORM);
+    Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
 #  endif
 
-		P = transform_point(&tfm, P);
-		D = transform_direction(&tfm, D*t);
-		D = normalize_len(D, &t);
-	}
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D * t);
+    D = normalize_len(D, &t);
+  }
 
-	P = P + D*t;
+  P = P + D * t;
 
-	/* Compute refined intersection distance. */
-	const float3 e1 = verts[0] - verts[2];
-	const float3 e2 = verts[1] - verts[2];
-	const float3 s1 = cross(D, e2);
+  /* Compute refined intersection distance. */
+  const float3 e1 = verts[0] - verts[2];
+  const float3 e2 = verts[1] - verts[2];
+  const float3 s1 = cross(D, e2);
 
-	const float invdivisor = 1.0f/dot(s1, e1);
-	const float3 d = P - verts[2];
-	const float3 s2 = cross(d, e1);
-	float rt = dot(e2, s2)*invdivisor;
+  const float invdivisor = 1.0f / dot(s1, e1);
+  const float3 d = P - verts[2];
+  const float3 s2 = cross(d, e1);
+  float rt = dot(e2, s2) * invdivisor;
 
-	/* Compute refined position. */
-	P = P + D*rt;
+  /* Compute refined position. */
+  P = P + D * rt;
 
-	if(isect->object != OBJECT_NONE) {
+  if (isect->object != OBJECT_NONE) {
 #  ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_tfm;
+    Transform tfm = sd->ob_tfm;
 #  else
-		Transform tfm = object_fetch_transform(kg,
-		                                       isect->object,
-		                                       OBJECT_TRANSFORM);
+    Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
 #  endif
 
-		P = transform_point(&tfm, P);
-	}
+    P = transform_point(&tfm, P);
+  }
 
-	return P;
+  return P;
 #else
-	return P + D*t;
+  return P + D * t;
 #endif
 }
 
@@ -103,116 +96,112 @@ ccl_device_noinline
 #  else
 ccl_device_inline
 #  endif
-float3 motion_triangle_refine_local(KernelGlobals *kg,
-                                    ShaderData *sd,
-                                    const Intersection *isect,
-                                    const Ray *ray,
-                                    float3 verts[3])
+    float3
+    motion_triangle_refine_local(KernelGlobals *kg,
+                                 ShaderData *sd,
+                                 const Intersection *isect,
+                                 const Ray *ray,
+                                 float3 verts[3])
 {
-	float3 P = ray->P;
-	float3 D = ray->D;
-	float t = isect->t;
+  float3 P = ray->P;
+  float3 D = ray->D;
+  float t = isect->t;
 
 #  ifdef __INTERSECTION_REFINE__
-	if(isect->object != OBJECT_NONE) {
+  if (isect->object != OBJECT_NONE) {
 #    ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_itfm;
+    Transform tfm = sd->ob_itfm;
 #    else
-		Transform tfm = object_fetch_transform(kg,
-		                                       isect->object,
-		                                       OBJECT_INVERSE_TRANSFORM);
+    Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
 #    endif
 
-		P = transform_point(&tfm, P);
-		D = transform_direction(&tfm, D);
-		D = normalize(D);
-	}
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D);
+    D = normalize(D);
+  }
 
-	P = P + D*t;
+  P = P + D * t;
 
-	/* compute refined intersection distance */
-	const float3 e1 = verts[0] - verts[2];
-	const float3 e2 = verts[1] - verts[2];
-	const float3 s1 = cross(D, e2);
+  /* compute refined intersection distance */
+  const float3 e1 = verts[0] - verts[2];
+  const float3 e2 = verts[1] - verts[2];
+  const float3 s1 = cross(D, e2);
 
-	const float invdivisor = 1.0f/dot(s1, e1);
-	const float3 d = P - verts[2];
-	const float3 s2 = cross(d, e1);
-	float rt = dot(e2, s2)*invdivisor;
+  const float invdivisor = 1.0f / dot(s1, e1);
+  const float3 d = P - verts[2];
+  const float3 s2 = cross(d, e1);
+  float rt = dot(e2, s2) * invdivisor;
 
-	P = P + D*rt;
+  P = P + D * rt;
 
-	if(isect->object != OBJECT_NONE) {
+  if (isect->object != OBJECT_NONE) {
 #    ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_tfm;
+    Transform tfm = sd->ob_tfm;
 #    else
-		Transform tfm = object_fetch_transform(kg,
-		                                       isect->object,
-		                                       OBJECT_TRANSFORM);
+    Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
 #    endif
 
-		P = transform_point(&tfm, P);
-	}
+    P = transform_point(&tfm, P);
+  }
 
-	return P;
+  return P;
 #  else  /* __INTERSECTION_REFINE__ */
-	return P + D*t;
-#  endif  /* __INTERSECTION_REFINE__ */
+  return P + D * t;
+#  endif /* __INTERSECTION_REFINE__ */
 }
-#endif  /* __BVH_LOCAL__ */
-
+#endif /* __BVH_LOCAL__ */
 
 /* Ray intersection. We simply compute the vertex positions at the given ray
  * time and do a ray intersection with the resulting triangle.
  */
 
-ccl_device_inline bool motion_triangle_intersect(
-        KernelGlobals *kg,
-        Intersection *isect,
-        float3 P,
-        float3 dir,
-        float time,
-        uint visibility,
-        int object,
-        int prim_addr)
+ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
+                                                 Intersection *isect,
+                                                 float3 P,
+                                                 float3 dir,
+                                                 float time,
+                                                 uint visibility,
+                                                 int object,
+                                                 int prim_addr)
 {
-	/* Primitive index for vertex location lookup. */
-	int prim = kernel_tex_fetch(__prim_index, prim_addr);
-	int fobject = (object == OBJECT_NONE)
-	                  ? kernel_tex_fetch(__prim_object, prim_addr)
-	                  : object;
-	/* Get vertex locations for intersection. */
-	float3 verts[3];
-	motion_triangle_vertices(kg, fobject, prim, time, verts);
-	/* Ray-triangle intersection, unoptimized. */
-	float t, u, v;
-	if(ray_triangle_intersect(P,
-	                          dir,
-	                          isect->t,
+  /* Primitive index for vertex location lookup. */
+  int prim = kernel_tex_fetch(__prim_index, prim_addr);
+  int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : object;
+  /* Get vertex locations for intersection. */
+  float3 verts[3];
+  motion_triangle_vertices(kg, fobject, prim, time, verts);
+  /* Ray-triangle intersection, unoptimized. */
+  float t, u, v;
+  if (ray_triangle_intersect(P,
+                             dir,
+                             isect->t,
 #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-	                          (ssef*)verts,
+                             (ssef *)verts,
 #else
-	                          verts[0], verts[1], verts[2],
+                             verts[0],
+                             verts[1],
+                             verts[2],
 #endif
-	                          &u, &v, &t))
-	{
+                             &u,
+                             &v,
+                             &t)) {
 #ifdef __VISIBILITY_FLAG__
-		/* Visibility flag test. we do it here under the assumption
-		 * that most triangles are culled by node flags.
-		 */
-		if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
+    /* Visibility flag test. we do it here under the assumption
+     * that most triangles are culled by node flags.
+     */
+    if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
 #endif
-		{
-			isect->t = t;
-			isect->u = u;
-			isect->v = v;
-			isect->prim = prim_addr;
-			isect->object = object;
-			isect->type = PRIMITIVE_MOTION_TRIANGLE;
-			return true;
-		}
-	}
-	return false;
+    {
+      isect->t = t;
+      isect->u = u;
+      isect->v = v;
+      isect->prim = prim_addr;
+      isect->object = object;
+      isect->type = PRIMITIVE_MOTION_TRIANGLE;
+      return true;
+    }
+  }
+  return false;
 }
 
 /* Special ray intersection routines for local intersections. In that case we
@@ -221,101 +210,102 @@ ccl_device_inline bool motion_triangle_intersect(
  * Returns whether traversal should be stopped.
  */
 #ifdef __BVH_LOCAL__
-ccl_device_inline bool motion_triangle_intersect_local(
-        KernelGlobals *kg,
-        LocalIntersection *local_isect,
-        float3 P,
-        float3 dir,
-        float time,
-        int object,
-        int local_object,
-        int prim_addr,
-        float tmax,
-        uint *lcg_state,
-        int max_hits)
+ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals *kg,
+                                                       LocalIntersection *local_isect,
+                                                       float3 P,
+                                                       float3 dir,
+                                                       float time,
+                                                       int object,
+                                                       int local_object,
+                                                       int prim_addr,
+                                                       float tmax,
+                                                       uint *lcg_state,
+                                                       int max_hits)
 {
-	/* Only intersect with matching object, for instanced objects we
-	 * already know we are only intersecting the right object. */
-	if(object == OBJECT_NONE) {
-		if(kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
-			return false;
-		}
-	}
-
-	/* Primitive index for vertex location lookup. */
-	int prim = kernel_tex_fetch(__prim_index, prim_addr);
-	/* Get vertex locations for intersection. */
-	float3 verts[3];
-	motion_triangle_vertices(kg, local_object, prim, time, verts);
-	/* Ray-triangle intersection, unoptimized. */
-	float t, u, v;
-	if(!ray_triangle_intersect(P,
-	                           dir,
-	                           tmax,
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-	                           (ssef*)verts,
-#else
-	                           verts[0], verts[1], verts[2],
-#endif
-	                           &u, &v, &t))
-	{
-		return false;
-	}
-
-	/* If no actual hit information is requested, just return here. */
-	if(max_hits == 0) {
-		return true;
-	}
-
-	int hit;
-	if(lcg_state) {
-		/* Record up to max_hits intersections. */
-		for(int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
-			if(local_isect->hits[i].t == t) {
-				return false;
-			}
-		}
-
-		local_isect->num_hits++;
-
-		if(local_isect->num_hits <= max_hits) {
-			hit = local_isect->num_hits - 1;
-		}
-		else {
-			/* Reservoir sampling: if we are at the maximum number of
-			 * hits, randomly replace element or skip it.
-			 */
-			hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
-
-			if(hit >= max_hits)
-				return false;
-		}
-	}
-	else {
-		/* Record closest intersection only. */
-		if(local_isect->num_hits && t > local_isect->hits[0].t) {
-			return false;
-		}
-
-		hit = 0;
-		local_isect->num_hits = 1;
-	}
-
-	/* Record intersection. */
-	Intersection *isect = &local_isect->hits[hit];
-	isect->t = t;
-	isect->u = u;
-	isect->v = v;
-	isect->prim = prim_addr;
-	isect->object = object;
-	isect->type = PRIMITIVE_MOTION_TRIANGLE;
-
-	/* Record geometric normal. */
-	local_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
-	                                       verts[2] - verts[0]));
-
-	return false;
+  /* Only intersect with matching object, for instanced objects we
+   * already know we are only intersecting the right object. */
+  if (object == OBJECT_NONE) {
+    if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
+      return false;
+    }
+  }
+
+  /* Primitive index for vertex location lookup. */
+  int prim = kernel_tex_fetch(__prim_index, prim_addr);
+  /* Get vertex locations for intersection. */
+  float3 verts[3];
+  motion_triangle_vertices(kg, local_object, prim, time, verts);
+  /* Ray-triangle intersection, unoptimized. */
+  float t, u, v;
+  if (!ray_triangle_intersect(P,
+                              dir,
+                              tmax,
+#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+                              (ssef *)verts,
+#  else
+                              verts[0],
+                              verts[1],
+                              verts[2],
+#  endif
+                              &u,
+                              &v,
+                              &t)) {
+    return false;
+  }
+
+  /* If no actual hit information is requested, just return here. */
+  if (max_hits == 0) {
+    return true;
+  }
+
+  int hit;
+  if (lcg_state) {
+    /* Record up to max_hits intersections. */
+    for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+      if (local_isect->hits[i].t == t) {
+        return false;
+      }
+    }
+
+    local_isect->num_hits++;
+
+    if (local_isect->num_hits <= max_hits) {
+      hit = local_isect->num_hits - 1;
+    }
+    else {
+      /* Reservoir sampling: if we are at the maximum number of
+       * hits, randomly replace element or skip it.
+       */
+      hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
+
+      if (hit >= max_hits)
+        return false;
+    }
+  }
+  else {
+    /* Record closest intersection only. */
+    if (local_isect->num_hits && t > local_isect->hits[0].t) {
+      return false;
+    }
+
+    hit = 0;
+    local_isect->num_hits = 1;
+  }
+
+  /* Record intersection. */
+  Intersection *isect = &local_isect->hits[hit];
+  isect->t = t;
+  isect->u = u;
+  isect->v = v;
+  isect->prim = prim_addr;
+  isect->object = object;
+  isect->type = PRIMITIVE_MOTION_TRIANGLE;
+
+  /* Record geometric normal. */
+  local_isect->Ng[hit] = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+
+  return false;
 }
-#endif  /* __BVH_LOCAL__ */
+#endif /* __BVH_LOCAL__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
index e91a4be96ba..5333e82b346 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
@@ -32,91 +32,80 @@ CCL_NAMESPACE_BEGIN
  * normals */
 
 /* return 3 triangle vertex normals */
-ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg,
-                                                      ShaderData *sd, const
-                                                      Intersection *isect,
-                                                      const Ray *ray,
-                                                      bool is_local)
+ccl_device_noinline void motion_triangle_shader_setup(
+    KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool is_local)
 {
-	/* Get shader. */
-	sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
-	/* Get motion info. */
-	/* TODO(sergey): This logic is really similar to motion_triangle_vertices(),
-	 * can we de-duplicate something here?
-	 */
-	int numsteps, numverts;
-	object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
-	/* Figure out which steps we need to fetch and their interpolation factor. */
-	int maxstep = numsteps*2;
-	int step = min((int)(sd->time*maxstep), maxstep-1);
-	float t = sd->time*maxstep - step;
-	/* Find attribute. */
-	AttributeElement elem;
-	int offset = find_attribute_motion(kg, sd->object,
-	                                   ATTR_STD_MOTION_VERTEX_POSITION,
-	                                   &elem);
-	kernel_assert(offset != ATTR_STD_NOT_FOUND);
-	/* Fetch vertex coordinates. */
-	float3 verts[3], next_verts[3];
-	uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
-	motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
-	motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
-	/* Interpolate between steps. */
-	verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
-	verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
-	verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
-	/* Compute refined position. */
+  /* Get shader. */
+  sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+  /* Get motion info. */
+  /* TODO(sergey): This logic is really similar to motion_triangle_vertices(),
+   * can we de-duplicate something here?
+   */
+  int numsteps, numverts;
+  object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
+  /* Figure out which steps we need to fetch and their interpolation factor. */
+  int maxstep = numsteps * 2;
+  int step = min((int)(sd->time * maxstep), maxstep - 1);
+  float t = sd->time * maxstep - step;
+  /* Find attribute. */
+  AttributeElement elem;
+  int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+  kernel_assert(offset != ATTR_STD_NOT_FOUND);
+  /* Fetch vertex coordinates. */
+  float3 verts[3], next_verts[3];
+  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
+  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
+  /* Interpolate between steps. */
+  verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
+  verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
+  verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
+  /* Compute refined position. */
 #ifdef __BVH_LOCAL__
-	if(is_local) {
-		sd->P = motion_triangle_refine_local(kg,
-		                                     sd,
-		                                     isect,
-		                                     ray,
-		                                     verts);
-	}
-	else
-#endif  /*  __BVH_LOCAL__*/
-	{
-		sd->P = motion_triangle_refine(kg, sd, isect, ray, verts);
-	}
-	/* Compute face normal. */
-	float3 Ng;
-	if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-		Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
-	}
-	else {
-		Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
-	}
-	sd->Ng = Ng;
-	sd->N = Ng;
-	/* Compute derivatives of P w.r.t. uv. */
+  if (is_local) {
+    sd->P = motion_triangle_refine_local(kg, sd, isect, ray, verts);
+  }
+  else
+#endif /*  __BVH_LOCAL__*/
+  {
+    sd->P = motion_triangle_refine(kg, sd, isect, ray, verts);
+  }
+  /* Compute face normal. */
+  float3 Ng;
+  if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+    Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
+  }
+  else {
+    Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+  }
+  sd->Ng = Ng;
+  sd->N = Ng;
+  /* Compute derivatives of P w.r.t. uv. */
 #ifdef __DPDU__
-	sd->dPdu = (verts[0] - verts[2]);
-	sd->dPdv = (verts[1] - verts[2]);
+  sd->dPdu = (verts[0] - verts[2]);
+  sd->dPdv = (verts[1] - verts[2]);
 #endif
-	/* Compute smooth normal. */
-	if(sd->shader & SHADER_SMOOTH_NORMAL) {
-		/* Find attribute. */
-		AttributeElement elem;
-		int offset = find_attribute_motion(kg,
-		                                   sd->object,
-		                                   ATTR_STD_MOTION_VERTEX_NORMAL,
-		                                   &elem);
-		kernel_assert(offset != ATTR_STD_NOT_FOUND);
-		/* Fetch vertex coordinates. */
-		float3 normals[3], next_normals[3];
-		motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
-		motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
-		/* Interpolate between steps. */
-		normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
-		normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
-		normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
-		/* Interpolate between vertices. */
-		float u = sd->u;
-		float v = sd->v;
-		float w = 1.0f - u - v;
-		sd->N = (u*normals[0] + v*normals[1] + w*normals[2]);
-	}
+  /* Compute smooth normal. */
+  if (sd->shader & SHADER_SMOOTH_NORMAL) {
+    /* Find attribute. */
+    AttributeElement elem;
+    int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
+    kernel_assert(offset != ATTR_STD_NOT_FOUND);
+    /* Fetch vertex coordinates. */
+    float3 normals[3], next_normals[3];
+    motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
+    motion_triangle_normals_for_step(
+        kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
+    /* Interpolate between steps. */
+    normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
+    normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
+    normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
+    /* Interpolate between vertices. */
+    float u = sd->u;
+    float v = sd->v;
+    float w = 1.0f - u - v;
+    sd->N = (u * normals[0] + v * normals[1] + w * normals[2]);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 669c932d720..2792fd64c61 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -27,131 +27,143 @@ CCL_NAMESPACE_BEGIN
 /* Object attributes, for now a fixed size and contents */
 
 enum ObjectTransform {
-	OBJECT_TRANSFORM = 0,
-	OBJECT_INVERSE_TRANSFORM = 1,
+  OBJECT_TRANSFORM = 0,
+  OBJECT_INVERSE_TRANSFORM = 1,
 };
 
-enum ObjectVectorTransform {
-	OBJECT_PASS_MOTION_PRE = 0,
-	OBJECT_PASS_MOTION_POST = 1
-};
+enum ObjectVectorTransform { OBJECT_PASS_MOTION_PRE = 0, OBJECT_PASS_MOTION_POST = 1 };
 
 /* Object to world space transformation */
 
-ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type)
+ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg,
+                                                   int object,
+                                                   enum ObjectTransform type)
 {
-	if(type == OBJECT_INVERSE_TRANSFORM) {
-		return kernel_tex_fetch(__objects, object).itfm;
-	}
-	else {
-		return kernel_tex_fetch(__objects, object).tfm;
-	}
+  if (type == OBJECT_INVERSE_TRANSFORM) {
+    return kernel_tex_fetch(__objects, object).itfm;
+  }
+  else {
+    return kernel_tex_fetch(__objects, object).tfm;
+  }
 }
 
 /* Lamp to world space transformation */
 
 ccl_device_inline Transform lamp_fetch_transform(KernelGlobals *kg, int lamp, bool inverse)
 {
-	if(inverse) {
-		return kernel_tex_fetch(__lights, lamp).itfm;
-	}
-	else {
-		return kernel_tex_fetch(__lights, lamp).tfm;
-	}
+  if (inverse) {
+    return kernel_tex_fetch(__lights, lamp).itfm;
+  }
+  else {
+    return kernel_tex_fetch(__lights, lamp).tfm;
+  }
 }
 
 /* Object to world space transformation for motion vectors */
 
-ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type)
+ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals *kg,
+                                                               int object,
+                                                               enum ObjectVectorTransform type)
 {
-	int offset = object*OBJECT_MOTION_PASS_SIZE + (int)type;
-	return kernel_tex_fetch(__object_motion_pass, offset);
+  int offset = object * OBJECT_MOTION_PASS_SIZE + (int)type;
+  return kernel_tex_fetch(__object_motion_pass, offset);
 }
 
 /* Motion blurred object transformations */
 
 #ifdef __OBJECT_MOTION__
-ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time)
-{
-	const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset;
-	const ccl_global DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
-	const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
-
-	Transform tfm;
-#ifdef __EMBREE__
-	if(kernel_data.bvh.scene) {
-		transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time);
-	}
-	else
-#endif
-	transform_motion_array_interpolate(&tfm, motion, num_steps, time);
+ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg,
+                                                          int object,
+                                                          float time)
+{
+  const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset;
+  const ccl_global DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
+  const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
+
+  Transform tfm;
+#  ifdef __EMBREE__
+  if (kernel_data.bvh.scene) {
+    transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time);
+  }
+  else
+#  endif
+    transform_motion_array_interpolate(&tfm, motion, num_steps, time);
 
-	return tfm;
+  return tfm;
 }
 
-ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg, int object, float time, Transform *itfm)
+ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg,
+                                                               int object,
+                                                               float time,
+                                                               Transform *itfm)
 {
-	int object_flag = kernel_tex_fetch(__object_flag, object);
-	if(object_flag & SD_OBJECT_MOTION) {
-		/* if we do motion blur */
-		Transform tfm = object_fetch_transform_motion(kg, object, time);
+  int object_flag = kernel_tex_fetch(__object_flag, object);
+  if (object_flag & SD_OBJECT_MOTION) {
+    /* if we do motion blur */
+    Transform tfm = object_fetch_transform_motion(kg, object, time);
 
-		if(itfm)
-			*itfm = transform_quick_inverse(tfm);
+    if (itfm)
+      *itfm = transform_quick_inverse(tfm);
 
-		return tfm;
-	}
-	else {
-		Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
-		if(itfm)
-			*itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+    return tfm;
+  }
+  else {
+    Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+    if (itfm)
+      *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 
-		return tfm;
-	}
+    return tfm;
+  }
 }
 #endif
 
 /* Transform position from object to world space */
 
-ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
+ccl_device_inline void object_position_transform(KernelGlobals *kg,
+                                                 const ShaderData *sd,
+                                                 float3 *P)
 {
 #ifdef __OBJECT_MOTION__
-	*P = transform_point_auto(&sd->ob_tfm, *P);
+  *P = transform_point_auto(&sd->ob_tfm, *P);
 #else
-	Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-	*P = transform_point(&tfm, *P);
+  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+  *P = transform_point(&tfm, *P);
 #endif
 }
 
 /* Transform position from world to object space */
 
-ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
+ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg,
+                                                         const ShaderData *sd,
+                                                         float3 *P)
 {
 #ifdef __OBJECT_MOTION__
-	*P = transform_point_auto(&sd->ob_itfm, *P);
+  *P = transform_point_auto(&sd->ob_itfm, *P);
 #else
-	Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
-	*P = transform_point(&tfm, *P);
+  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+  *P = transform_point(&tfm, *P);
 #endif
 }
 
 /* Transform normal from world to object space */
 
-ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
+ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg,
+                                                       const ShaderData *sd,
+                                                       float3 *N)
 {
 #ifdef __OBJECT_MOTION__
-	if((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) {
-		*N = normalize(transform_direction_transposed_auto(&sd->ob_tfm, *N));
-	}
+  if ((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) {
+    *N = normalize(transform_direction_transposed_auto(&sd->ob_tfm, *N));
+  }
 #else
-	if(sd->object != OBJECT_NONE) {
-		Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-		*N = normalize(transform_direction_transposed(&tfm, *N));
-	}
-	else if(sd->type == PRIMITIVE_LAMP) {
-		Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
-		*N = normalize(transform_direction_transposed(&tfm, *N));
-	}
+  if (sd->object != OBJECT_NONE) {
+    Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+    *N = normalize(transform_direction_transposed(&tfm, *N));
+  }
+  else if (sd->type == PRIMITIVE_LAMP) {
+    Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
+    *N = normalize(transform_direction_transposed(&tfm, *N));
+  }
 #endif
 }
 
@@ -160,10 +172,10 @@ ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const
 ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
 {
 #ifdef __OBJECT_MOTION__
-	*N = normalize(transform_direction_transposed_auto(&sd->ob_itfm, *N));
+  *N = normalize(transform_direction_transposed_auto(&sd->ob_itfm, *N));
 #else
-	Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
-	*N = normalize(transform_direction_transposed(&tfm, *N));
+  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+  *N = normalize(transform_direction_transposed(&tfm, *N));
 #endif
 }
 
@@ -172,22 +184,24 @@ ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderDa
 ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
 {
 #ifdef __OBJECT_MOTION__
-	*D = transform_direction_auto(&sd->ob_tfm, *D);
+  *D = transform_direction_auto(&sd->ob_tfm, *D);
 #else
-	Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-	*D = transform_direction(&tfm, *D);
+  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+  *D = transform_direction(&tfm, *D);
 #endif
 }
 
 /* Transform direction vector from world to object space */
 
-ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
+ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg,
+                                                    const ShaderData *sd,
+                                                    float3 *D)
 {
 #ifdef __OBJECT_MOTION__
-	*D = transform_direction_auto(&sd->ob_itfm, *D);
+  *D = transform_direction_auto(&sd->ob_itfm, *D);
 #else
-	Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
-	*D = transform_direction(&tfm, *D);
+  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+  *D = transform_direction(&tfm, *D);
 #endif
 }
 
@@ -195,14 +209,14 @@ ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const Sha
 
 ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd)
 {
-	if(sd->object == OBJECT_NONE)
-		return make_float3(0.0f, 0.0f, 0.0f);
+  if (sd->object == OBJECT_NONE)
+    return make_float3(0.0f, 0.0f, 0.0f);
 
 #ifdef __OBJECT_MOTION__
-	return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w);
+  return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w);
 #else
-	Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-	return make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
+  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+  return make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
 #endif
 }
 
@@ -210,218 +224,211 @@ ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd
 
 ccl_device_inline float object_surface_area(KernelGlobals *kg, int object)
 {
-	return kernel_tex_fetch(__objects, object).surface_area;
+  return kernel_tex_fetch(__objects, object).surface_area;
 }
 
 /* Pass ID number of object */
 
 ccl_device_inline float object_pass_id(KernelGlobals *kg, int object)
 {
-	if(object == OBJECT_NONE)
-		return 0.0f;
+  if (object == OBJECT_NONE)
+    return 0.0f;
 
-	return kernel_tex_fetch(__objects, object).pass_id;
+  return kernel_tex_fetch(__objects, object).pass_id;
 }
 
 /* Per lamp random number for shader variation */
 
 ccl_device_inline float lamp_random_number(KernelGlobals *kg, int lamp)
 {
-	if(lamp == LAMP_NONE)
-		return 0.0f;
+  if (lamp == LAMP_NONE)
+    return 0.0f;
 
-	return kernel_tex_fetch(__lights, lamp).random;
+  return kernel_tex_fetch(__lights, lamp).random;
 }
 
 /* Per object random number for shader variation */
 
 ccl_device_inline float object_random_number(KernelGlobals *kg, int object)
 {
-	if(object == OBJECT_NONE)
-		return 0.0f;
+  if (object == OBJECT_NONE)
+    return 0.0f;
 
-	return kernel_tex_fetch(__objects, object).random_number;
+  return kernel_tex_fetch(__objects, object).random_number;
 }
 
 /* Particle ID from which this object was generated */
 
 ccl_device_inline int object_particle_id(KernelGlobals *kg, int object)
 {
-	if(object == OBJECT_NONE)
-		return 0;
+  if (object == OBJECT_NONE)
+    return 0;
 
-	return kernel_tex_fetch(__objects, object).particle_index;
+  return kernel_tex_fetch(__objects, object).particle_index;
 }
 
 /* Generated texture coordinate on surface from where object was instanced */
 
 ccl_device_inline float3 object_dupli_generated(KernelGlobals *kg, int object)
 {
-	if(object == OBJECT_NONE)
-		return make_float3(0.0f, 0.0f, 0.0f);
+  if (object == OBJECT_NONE)
+    return make_float3(0.0f, 0.0f, 0.0f);
 
-	const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
-	return make_float3(kobject->dupli_generated[0],
-	                   kobject->dupli_generated[1],
-	                   kobject->dupli_generated[2]);
+  const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  return make_float3(
+      kobject->dupli_generated[0], kobject->dupli_generated[1], kobject->dupli_generated[2]);
 }
 
 /* UV texture coordinate on surface from where object was instanced */
 
 ccl_device_inline float3 object_dupli_uv(KernelGlobals *kg, int object)
 {
-	if(object == OBJECT_NONE)
-		return make_float3(0.0f, 0.0f, 0.0f);
+  if (object == OBJECT_NONE)
+    return make_float3(0.0f, 0.0f, 0.0f);
 
-	const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
-	return make_float3(kobject->dupli_uv[0],
-	                   kobject->dupli_uv[1],
-	                   0.0f);
+  const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  return make_float3(kobject->dupli_uv[0], kobject->dupli_uv[1], 0.0f);
 }
 
 /* Information about mesh for motion blurred triangles and curves */
 
-ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys)
+ccl_device_inline void object_motion_info(
+    KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys)
 {
-	if(numkeys) {
-		*numkeys = kernel_tex_fetch(__objects, object).numkeys;
-	}
+  if (numkeys) {
+    *numkeys = kernel_tex_fetch(__objects, object).numkeys;
+  }
 
-	if(numsteps)
-		*numsteps = kernel_tex_fetch(__objects, object).numsteps;
-	if(numverts)
-		*numverts = kernel_tex_fetch(__objects, object).numverts;
+  if (numsteps)
+    *numsteps = kernel_tex_fetch(__objects, object).numsteps;
+  if (numverts)
+    *numverts = kernel_tex_fetch(__objects, object).numverts;
 }
 
 /* Offset to an objects patch map */
 
 ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object)
 {
-	if(object == OBJECT_NONE)
-		return 0;
+  if (object == OBJECT_NONE)
+    return 0;
 
-	return kernel_tex_fetch(__objects, object).patch_map_offset;
+  return kernel_tex_fetch(__objects, object).patch_map_offset;
 }
 
 /* Pass ID for shader */
 
 ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
 {
-	return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
+  return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
 }
 
 /* Cryptomatte ID */
 
 ccl_device_inline float object_cryptomatte_id(KernelGlobals *kg, int object)
 {
-	if(object == OBJECT_NONE)
-		return 0.0f;
+  if (object == OBJECT_NONE)
+    return 0.0f;
 
-	return kernel_tex_fetch(__objects, object).cryptomatte_object;
+  return kernel_tex_fetch(__objects, object).cryptomatte_object;
 }
 
 ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals *kg, int object)
 {
-	if(object == OBJECT_NONE)
-		return 0;
+  if (object == OBJECT_NONE)
+    return 0;
 
-	return kernel_tex_fetch(__objects, object).cryptomatte_asset;
+  return kernel_tex_fetch(__objects, object).cryptomatte_asset;
 }
 
 /* Particle data from which object was instanced */
 
 ccl_device_inline uint particle_index(KernelGlobals *kg, int particle)
 {
-	return kernel_tex_fetch(__particles, particle).index;
+  return kernel_tex_fetch(__particles, particle).index;
 }
 
 ccl_device float particle_age(KernelGlobals *kg, int particle)
 {
-	return kernel_tex_fetch(__particles, particle).age;
+  return kernel_tex_fetch(__particles, particle).age;
 }
 
 ccl_device float particle_lifetime(KernelGlobals *kg, int particle)
 {
-	return kernel_tex_fetch(__particles, particle).lifetime;
+  return kernel_tex_fetch(__particles, particle).lifetime;
 }
 
 ccl_device float particle_size(KernelGlobals *kg, int particle)
 {
-	return kernel_tex_fetch(__particles, particle).size;
+  return kernel_tex_fetch(__particles, particle).size;
 }
 
 ccl_device float4 particle_rotation(KernelGlobals *kg, int particle)
 {
-	return kernel_tex_fetch(__particles, particle).rotation;
+  return kernel_tex_fetch(__particles, particle).rotation;
 }
 
 ccl_device float3 particle_location(KernelGlobals *kg, int particle)
 {
-	return float4_to_float3(kernel_tex_fetch(__particles, particle).location);
+  return float4_to_float3(kernel_tex_fetch(__particles, particle).location);
 }
 
 ccl_device float3 particle_velocity(KernelGlobals *kg, int particle)
 {
-	return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity);
+  return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity);
 }
 
 ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
 {
-	return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity);
+  return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity);
 }
 
 /* Object intersection in BVH */
 
 ccl_device_inline float3 bvh_clamp_direction(float3 dir)
 {
-	/* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */
+  /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */
 #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
-	const ssef oopes(8.271806E-25f,8.271806E-25f,8.271806E-25f,0.0f);
-	const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes);
-	const ssef signdir = signmsk(dir.m128) | oopes;
+  const ssef oopes(8.271806E-25f, 8.271806E-25f, 8.271806E-25f, 0.0f);
+  const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes);
+  const ssef signdir = signmsk(dir.m128) | oopes;
 #  ifndef __KERNEL_AVX__
-	ssef res = mask & ssef(dir);
-	res = _mm_or_ps(res,_mm_andnot_ps(mask, signdir));
+  ssef res = mask & ssef(dir);
+  res = _mm_or_ps(res, _mm_andnot_ps(mask, signdir));
 #  else
-	ssef res = _mm_blendv_ps(signdir, dir, mask);
+  ssef res = _mm_blendv_ps(signdir, dir, mask);
 #  endif
-	return float3(res);
+  return float3(res);
 #else  /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
-	const float ooeps = 8.271806E-25f;
-	return make_float3((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x),
-	                   (fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y),
-	                   (fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z));
-#endif  /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
+  const float ooeps = 8.271806E-25f;
+  return make_float3((fabsf(dir.x) > ooeps) ? dir.x : copysignf(ooeps, dir.x),
+                     (fabsf(dir.y) > ooeps) ? dir.y : copysignf(ooeps, dir.y),
+                     (fabsf(dir.z) > ooeps) ? dir.z : copysignf(ooeps, dir.z));
+#endif /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
 }
 
 ccl_device_inline float3 bvh_inverse_direction(float3 dir)
 {
-	return rcp(dir);
+  return rcp(dir);
 }
 
 /* Transform ray into object space to enter static object in BVH */
 
-ccl_device_inline float bvh_instance_push(KernelGlobals *kg,
-                                          int object,
-                                          const Ray *ray,
-                                          float3 *P,
-                                          float3 *dir,
-                                          float3 *idir,
-                                          float t)
+ccl_device_inline float bvh_instance_push(
+    KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float t)
 {
-	Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+  Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 
-	*P = transform_point(&tfm, ray->P);
+  *P = transform_point(&tfm, ray->P);
 
-	float len;
-	*dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
-	*idir = bvh_inverse_direction(*dir);
+  float len;
+  *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
+  *idir = bvh_inverse_direction(*dir);
 
-	if(t != FLT_MAX) {
-		t *= len;
-	}
+  if (t != FLT_MAX) {
+    t *= len;
+  }
 
-	return t;
+  return t;
 }
 
 #ifdef __QBVH__
@@ -440,85 +447,85 @@ ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
                                           float *t,
                                           float *t1)
 {
-	Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+  Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 
-	*P = transform_point(&tfm, ray->P);
+  *P = transform_point(&tfm, ray->P);
 
-	float len;
-	*dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
-	*idir = bvh_inverse_direction(*dir);
+  float len;
+  *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
+  *idir = bvh_inverse_direction(*dir);
 
-	if(*t != FLT_MAX)
-		*t *= len;
+  if (*t != FLT_MAX)
+    *t *= len;
 
-	if(*t1 != -FLT_MAX)
-		*t1 *= len;
+  if (*t1 != -FLT_MAX)
+    *t1 *= len;
 }
 #endif
 
 /* Transorm ray to exit static object in BVH */
 
-ccl_device_inline float bvh_instance_pop(KernelGlobals *kg,
-                                         int object,
-                                         const Ray *ray,
-                                         float3 *P,
-                                         float3 *dir,
-                                         float3 *idir,
-                                         float t)
+ccl_device_inline float bvh_instance_pop(
+    KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float t)
 {
-	if(t != FLT_MAX) {
-		Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-		t /= len(transform_direction(&tfm, ray->D));
-	}
+  if (t != FLT_MAX) {
+    Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+    t /= len(transform_direction(&tfm, ray->D));
+  }
 
-	*P = ray->P;
-	*dir = bvh_clamp_direction(ray->D);
-	*idir = bvh_inverse_direction(*dir);
+  *P = ray->P;
+  *dir = bvh_clamp_direction(ray->D);
+  *idir = bvh_inverse_direction(*dir);
 
-	return t;
+  return t;
 }
 
 /* Same as above, but returns scale factor to apply to multiple intersection distances */
 
-ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t_fac)
+ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg,
+                                               int object,
+                                               const Ray *ray,
+                                               float3 *P,
+                                               float3 *dir,
+                                               float3 *idir,
+                                               float *t_fac)
 {
-	Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-	*t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
+  Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+  *t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
 
-	*P = ray->P;
-	*dir = bvh_clamp_direction(ray->D);
-	*idir = bvh_inverse_direction(*dir);
+  *P = ray->P;
+  *dir = bvh_clamp_direction(ray->D);
+  *idir = bvh_inverse_direction(*dir);
 }
 
-
 #ifdef __OBJECT_MOTION__
 /* Transform ray into object space to enter motion blurred object in BVH */
 
 ccl_device_inline float bvh_instance_motion_push(KernelGlobals *kg,
-                                                int object,
-                                                const Ray *ray,
-                                                float3 *P,
-                                                float3 *dir,
-                                                float3 *idir,
-                                                float t,
-                                                Transform *itfm)
+                                                 int object,
+                                                 const Ray *ray,
+                                                 float3 *P,
+                                                 float3 *dir,
+                                                 float3 *idir,
+                                                 float t,
+                                                 Transform *itfm)
 {
-	object_fetch_transform_motion_test(kg, object, ray->time, itfm);
+  object_fetch_transform_motion_test(kg, object, ray->time, itfm);
 
-	*P = transform_point(itfm, ray->P);
+  *P = transform_point(itfm, ray->P);
 
-	float len;
-	*dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
-	*idir = bvh_inverse_direction(*dir);
+  float len;
+  *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
+  *idir = bvh_inverse_direction(*dir);
 
-	if(t != FLT_MAX) {
-		t *= len;
-	}
+  if (t != FLT_MAX) {
+    t *= len;
+  }
 
-	return t;
+  return t;
 }
 
-#ifdef __QBVH__
+#  ifdef __QBVH__
 /* Same as above, but optimized for QBVH scene intersection,
  * which needs to modify two max distances.
  *
@@ -535,21 +542,21 @@ ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg,
                                                  float *t1,
                                                  Transform *itfm)
 {
-	object_fetch_transform_motion_test(kg, object, ray->time, itfm);
+  object_fetch_transform_motion_test(kg, object, ray->time, itfm);
 
-	*P = transform_point(itfm, ray->P);
+  *P = transform_point(itfm, ray->P);
 
-	float len;
-	*dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
-	*idir = bvh_inverse_direction(*dir);
+  float len;
+  *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
+  *idir = bvh_inverse_direction(*dir);
 
-	if(*t != FLT_MAX)
-		*t *= len;
+  if (*t != FLT_MAX)
+    *t *= len;
 
-	if(*t1 != -FLT_MAX)
-		*t1 *= len;
+  if (*t1 != -FLT_MAX)
+    *t1 *= len;
 }
-#endif
+#  endif
 
 /* Transorm ray to exit motion blurred object in BVH */
 
@@ -562,15 +569,15 @@ ccl_device_inline float bvh_instance_motion_pop(KernelGlobals *kg,
                                                 float t,
                                                 Transform *itfm)
 {
-	if(t != FLT_MAX) {
-		t /= len(transform_direction(itfm, ray->D));
-	}
+  if (t != FLT_MAX) {
+    t /= len(transform_direction(itfm, ray->D));
+  }
 
-	*P = ray->P;
-	*dir = bvh_clamp_direction(ray->D);
-	*idir = bvh_inverse_direction(*dir);
+  *P = ray->P;
+  *dir = bvh_clamp_direction(ray->D);
+  *idir = bvh_inverse_direction(*dir);
 
-	return t;
+  return t;
 }
 
 /* Same as above, but returns scale factor to apply to multiple intersection distances */
@@ -584,10 +591,10 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg,
                                                       float *t_fac,
                                                       Transform *itfm)
 {
-	*t_fac = 1.0f / len(transform_direction(itfm, ray->D));
-	*P = ray->P;
-	*dir = bvh_clamp_direction(ray->D);
-	*idir = bvh_inverse_direction(*dir);
+  *t_fac = 1.0f / len(transform_direction(itfm, ray->D));
+  *P = ray->P;
+  *dir = bvh_clamp_direction(ray->D);
+  *idir = bvh_inverse_direction(*dir);
 }
 
 #endif
@@ -599,30 +606,30 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg,
 
 #ifdef __KERNEL_OPENCL__
 ccl_device_inline void object_position_transform_addrspace(KernelGlobals *kg,
-                                                         const ShaderData *sd,
-                                                         ccl_addr_space float3 *P)
+                                                           const ShaderData *sd,
+                                                           ccl_addr_space float3 *P)
 {
-	float3 private_P = *P;
-	object_position_transform(kg, sd, &private_P);
-	*P = private_P;
+  float3 private_P = *P;
+  object_position_transform(kg, sd, &private_P);
+  *P = private_P;
 }
 
 ccl_device_inline void object_dir_transform_addrspace(KernelGlobals *kg,
                                                       const ShaderData *sd,
                                                       ccl_addr_space float3 *D)
 {
-	float3 private_D = *D;
-	object_dir_transform(kg, sd, &private_D);
-	*D = private_D;
+  float3 private_D = *D;
+  object_dir_transform(kg, sd, &private_D);
+  *D = private_D;
 }
 
 ccl_device_inline void object_normal_transform_addrspace(KernelGlobals *kg,
                                                          const ShaderData *sd,
                                                          ccl_addr_space float3 *N)
 {
-	float3 private_N = *N;
-	object_normal_transform(kg, sd, &private_N);
-	*N = private_N;
+  float3 private_N = *N;
+  object_normal_transform(kg, sd, &private_N);
+  *N = private_N;
 }
 #endif
 
diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h
index edb82172959..df19199f68e 100644
--- a/intern/cycles/kernel/geom/geom_patch.h
+++ b/intern/cycles/kernel/geom/geom_patch.h
@@ -27,342 +27,394 @@
 CCL_NAMESPACE_BEGIN
 
 typedef struct PatchHandle {
-	int array_index, patch_index, vert_index;
+  int array_index, patch_index, vert_index;
 } PatchHandle;
 
 ccl_device_inline int patch_map_resolve_quadrant(float median, float *u, float *v)
 {
-	int quadrant = -1;
-
-	if(*u < median) {
-		if(*v < median) {
-			quadrant = 0;
-		}
-		else {
-			quadrant = 1;
-			*v -= median;
-		}
-	}
-	else {
-		if(*v < median) {
-			quadrant = 3;
-		}
-		else {
-			quadrant = 2;
-			*v -= median;
-		}
-		*u -= median;
-	}
-
-	return quadrant;
+  int quadrant = -1;
+
+  if (*u < median) {
+    if (*v < median) {
+      quadrant = 0;
+    }
+    else {
+      quadrant = 1;
+      *v -= median;
+    }
+  }
+  else {
+    if (*v < median) {
+      quadrant = 3;
+    }
+    else {
+      quadrant = 2;
+      *v -= median;
+    }
+    *u -= median;
+  }
+
+  return quadrant;
 }
 
 /* retrieve PatchHandle from patch coords */
 
-ccl_device_inline PatchHandle patch_map_find_patch(KernelGlobals *kg, int object, int patch, float u, float v)
+ccl_device_inline PatchHandle
+patch_map_find_patch(KernelGlobals *kg, int object, int patch, float u, float v)
 {
-	PatchHandle handle;
+  PatchHandle handle;
 
-	kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f));
+  kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f));
 
-	int node = (object_patch_map_offset(kg, object) + patch)/2;
-	float median = 0.5f;
+  int node = (object_patch_map_offset(kg, object) + patch) / 2;
+  float median = 0.5f;
 
-	for(int depth = 0; depth < 0xff; depth++) {
-		float delta = median * 0.5f;
+  for (int depth = 0; depth < 0xff; depth++) {
+    float delta = median * 0.5f;
 
-		int quadrant = patch_map_resolve_quadrant(median, &u, &v);
-		kernel_assert(quadrant >= 0);
+    int quadrant = patch_map_resolve_quadrant(median, &u, &v);
+    kernel_assert(quadrant >= 0);
 
-		uint child = kernel_tex_fetch(__patches, node + quadrant);
+    uint child = kernel_tex_fetch(__patches, node + quadrant);
 
-		/* is the quadrant a hole? */
-		if(!(child & PATCH_MAP_NODE_IS_SET)) {
-			handle.array_index = -1;
-			return handle;
-		}
+    /* is the quadrant a hole? */
+    if (!(child & PATCH_MAP_NODE_IS_SET)) {
+      handle.array_index = -1;
+      return handle;
+    }
 
-		uint index = child & PATCH_MAP_NODE_INDEX_MASK;
+    uint index = child & PATCH_MAP_NODE_INDEX_MASK;
 
-		if(child & PATCH_MAP_NODE_IS_LEAF) {
-			handle.array_index = kernel_tex_fetch(__patches, index + 0);
-			handle.patch_index = kernel_tex_fetch(__patches, index + 1);
-			handle.vert_index = kernel_tex_fetch(__patches, index + 2);
+    if (child & PATCH_MAP_NODE_IS_LEAF) {
+      handle.array_index = kernel_tex_fetch(__patches, index + 0);
+      handle.patch_index = kernel_tex_fetch(__patches, index + 1);
+      handle.vert_index = kernel_tex_fetch(__patches, index + 2);
 
-			return handle;
-		} else {
-			node = index;
-		}
+      return handle;
+    }
+    else {
+      node = index;
+    }
 
-		median = delta;
-	}
+    median = delta;
+  }
 
-	/* no leaf found */
-	kernel_assert(0);
+  /* no leaf found */
+  kernel_assert(0);
 
-	handle.array_index = -1;
-	return handle;
+  handle.array_index = -1;
+  return handle;
 }
 
 ccl_device_inline void patch_eval_bspline_weights(float t, float *point, float *deriv)
 {
-	/* The four uniform cubic B-Spline basis functions evaluated at t */
-	float inv_6 = 1.0f / 6.0f;
-
-	float t2 = t * t;
-	float t3 = t * t2;
-
-	point[0] = inv_6 * (1.0f - 3.0f*(t - t2) - t3);
-	point[1] = inv_6 * (4.0f - 6.0f*t2 + 3.0f*t3);
-	point[2] = inv_6 * (1.0f + 3.0f*(t + t2 - t3));
-	point[3] = inv_6 * t3;
-
-	/* Derivatives of the above four basis functions at t */
-	deriv[0] = -0.5f*t2 + t - 0.5f;
-	deriv[1] =  1.5f*t2 - 2.0f*t;
-	deriv[2] = -1.5f*t2 + t + 0.5f;
-	deriv[3] =  0.5f*t2;
+  /* The four uniform cubic B-Spline basis functions evaluated at t */
+  float inv_6 = 1.0f / 6.0f;
+
+  float t2 = t * t;
+  float t3 = t * t2;
+
+  point[0] = inv_6 * (1.0f - 3.0f * (t - t2) - t3);
+  point[1] = inv_6 * (4.0f - 6.0f * t2 + 3.0f * t3);
+  point[2] = inv_6 * (1.0f + 3.0f * (t + t2 - t3));
+  point[3] = inv_6 * t3;
+
+  /* Derivatives of the above four basis functions at t */
+  deriv[0] = -0.5f * t2 + t - 0.5f;
+  deriv[1] = 1.5f * t2 - 2.0f * t;
+  deriv[2] = -1.5f * t2 + t + 0.5f;
+  deriv[3] = 0.5f * t2;
 }
 
 ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits, float *s, float *t)
 {
-	int boundary = ((bits >> 8) & 0xf);
-
-	if(boundary & 1) {
-		t[2] -= t[0];
-		t[1] += 2*t[0];
-		t[0] = 0;
-	}
-
-	if(boundary & 2) {
-		s[1] -= s[3];
-		s[2] += 2*s[3];
-		s[3] = 0;
-	}
-
-	if(boundary & 4) {
-		t[1] -= t[3];
-		t[2] += 2*t[3];
-		t[3] = 0;
-	}
-
-	if(boundary & 8) {
-		s[2] -= s[0];
-		s[1] += 2*s[0];
-		s[0] = 0;
-	}
+  int boundary = ((bits >> 8) & 0xf);
+
+  if (boundary & 1) {
+    t[2] -= t[0];
+    t[1] += 2 * t[0];
+    t[0] = 0;
+  }
+
+  if (boundary & 2) {
+    s[1] -= s[3];
+    s[2] += 2 * s[3];
+    s[3] = 0;
+  }
+
+  if (boundary & 4) {
+    t[1] -= t[3];
+    t[2] += 2 * t[3];
+    t[3] = 0;
+  }
+
+  if (boundary & 8) {
+    s[2] -= s[0];
+    s[1] += 2 * s[0];
+    s[0] = 0;
+  }
 }
 
 ccl_device_inline int patch_eval_depth(uint patch_bits)
 {
-	return (patch_bits & 0xf);
+  return (patch_bits & 0xf);
 }
 
 ccl_device_inline float patch_eval_param_fraction(uint patch_bits)
 {
-	bool non_quad_root = (patch_bits >> 4) & 0x1;
-	int depth = patch_eval_depth(patch_bits);
-
-	if(non_quad_root) {
-		return 1.0f / (float)(1 << (depth-1));
-	}
-	else {
-		return 1.0f / (float)(1 << depth);
-	}
+  bool non_quad_root = (patch_bits >> 4) & 0x1;
+  int depth = patch_eval_depth(patch_bits);
+
+  if (non_quad_root) {
+    return 1.0f / (float)(1 << (depth - 1));
+  }
+  else {
+    return 1.0f / (float)(1 << depth);
+  }
 }
 
 ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, float *u, float *v)
 {
-	float frac = patch_eval_param_fraction(patch_bits);
+  float frac = patch_eval_param_fraction(patch_bits);
 
-	int iu = (patch_bits >> 22) & 0x3ff;
-	int iv = (patch_bits >> 12) & 0x3ff;
+  int iu = (patch_bits >> 22) & 0x3ff;
+  int iv = (patch_bits >> 12) & 0x3ff;
 
-	/* top left corner */
-	float pu = (float)iu*frac;
-	float pv = (float)iv*frac;
+  /* top left corner */
+  float pu = (float)iu * frac;
+  float pv = (float)iv * frac;
 
-	/* normalize uv coordinates */
-	*u = (*u - pu) / frac;
-	*v = (*v - pv) / frac;
+  /* normalize uv coordinates */
+  *u = (*u - pu) / frac;
+  *v = (*v - pv) / frac;
 }
 
 /* retrieve patch control indices */
 
-ccl_device_inline int patch_eval_indices(KernelGlobals *kg, const PatchHandle *handle, int channel,
+ccl_device_inline int patch_eval_indices(KernelGlobals *kg,
+                                         const PatchHandle *handle,
+                                         int channel,
                                          int indices[PATCH_MAX_CONTROL_VERTS])
 {
-	int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index;
+  int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index;
 
-	/* XXX: regular patches only */
-	for(int i = 0; i < 16; i++) {
-		indices[i] = kernel_tex_fetch(__patches, index_base + i);
-	}
+  /* XXX: regular patches only */
+  for (int i = 0; i < 16; i++) {
+    indices[i] = kernel_tex_fetch(__patches, index_base + i);
+  }
 
-	return 16;
+  return 16;
 }
 
 /* evaluate patch basis functions */
 
-ccl_device_inline void patch_eval_basis(KernelGlobals *kg, const PatchHandle *handle, float u, float v,
-                                float weights[PATCH_MAX_CONTROL_VERTS],
-                                float weights_du[PATCH_MAX_CONTROL_VERTS],
-                                float weights_dv[PATCH_MAX_CONTROL_VERTS])
+ccl_device_inline void patch_eval_basis(KernelGlobals *kg,
+                                        const PatchHandle *handle,
+                                        float u,
+                                        float v,
+                                        float weights[PATCH_MAX_CONTROL_VERTS],
+                                        float weights_du[PATCH_MAX_CONTROL_VERTS],
+                                        float weights_dv[PATCH_MAX_CONTROL_VERTS])
 {
-	uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */
-	float d_scale = 1 << patch_eval_depth(patch_bits);
+  uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */
+  float d_scale = 1 << patch_eval_depth(patch_bits);
 
-	bool non_quad_root = (patch_bits >> 4) & 0x1;
-	if(non_quad_root) {
-		d_scale *= 0.5f;
-	}
+  bool non_quad_root = (patch_bits >> 4) & 0x1;
+  if (non_quad_root) {
+    d_scale *= 0.5f;
+  }
 
-	patch_eval_normalize_coords(patch_bits, &u, &v);
+  patch_eval_normalize_coords(patch_bits, &u, &v);
 
-	/* XXX: regular patches only for now. */
+  /* XXX: regular patches only for now. */
 
-	float s[4], t[4], ds[4], dt[4];
+  float s[4], t[4], ds[4], dt[4];
 
-	patch_eval_bspline_weights(u, s, ds);
-	patch_eval_bspline_weights(v, t, dt);
+  patch_eval_bspline_weights(u, s, ds);
+  patch_eval_bspline_weights(v, t, dt);
 
-	patch_eval_adjust_boundary_weights(patch_bits, s, t);
-	patch_eval_adjust_boundary_weights(patch_bits, ds, dt);
+  patch_eval_adjust_boundary_weights(patch_bits, s, t);
+  patch_eval_adjust_boundary_weights(patch_bits, ds, dt);
 
-	for(int k = 0; k < 4; k++) {
-		for(int l = 0; l < 4; l++) {
-			weights[4*k+l] = s[l] * t[k];
-			weights_du[4*k+l] = ds[l] * t[k] * d_scale;
-			weights_dv[4*k+l] = s[l] * dt[k] * d_scale;
-		}
-	}
+  for (int k = 0; k < 4; k++) {
+    for (int l = 0; l < 4; l++) {
+      weights[4 * k + l] = s[l] * t[k];
+      weights_du[4 * k + l] = ds[l] * t[k] * d_scale;
+      weights_dv[4 * k + l] = s[l] * dt[k] * d_scale;
+    }
+  }
 }
 
 /* generic function for evaluating indices and weights from patch coords */
 
-ccl_device_inline int patch_eval_control_verts(KernelGlobals *kg, int object, int patch, float u, float v, int channel,
-                                        int indices[PATCH_MAX_CONTROL_VERTS],
-                                        float weights[PATCH_MAX_CONTROL_VERTS],
-                                        float weights_du[PATCH_MAX_CONTROL_VERTS],
-                                        float weights_dv[PATCH_MAX_CONTROL_VERTS])
+ccl_device_inline int patch_eval_control_verts(KernelGlobals *kg,
+                                               int object,
+                                               int patch,
+                                               float u,
+                                               float v,
+                                               int channel,
+                                               int indices[PATCH_MAX_CONTROL_VERTS],
+                                               float weights[PATCH_MAX_CONTROL_VERTS],
+                                               float weights_du[PATCH_MAX_CONTROL_VERTS],
+                                               float weights_dv[PATCH_MAX_CONTROL_VERTS])
 {
-	PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v);
-	kernel_assert(handle.array_index >= 0);
+  PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v);
+  kernel_assert(handle.array_index >= 0);
 
-	int num_control = patch_eval_indices(kg, &handle, channel, indices);
-	patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv);
+  int num_control = patch_eval_indices(kg, &handle, channel, indices);
+  patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv);
 
-	return num_control;
+  return num_control;
 }
 
 /* functions for evaluating attributes on patches */
 
-ccl_device float patch_eval_float(KernelGlobals *kg, const ShaderData *sd, int offset,
-                                  int patch, float u, float v, int channel,
-                                  float *du, float* dv)
+ccl_device float patch_eval_float(KernelGlobals *kg,
+                                  const ShaderData *sd,
+                                  int offset,
+                                  int patch,
+                                  float u,
+                                  float v,
+                                  int channel,
+                                  float *du,
+                                  float *dv)
 {
-	int indices[PATCH_MAX_CONTROL_VERTS];
-	float weights[PATCH_MAX_CONTROL_VERTS];
-	float weights_du[PATCH_MAX_CONTROL_VERTS];
-	float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
-	int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel,
-	                                           indices, weights, weights_du, weights_dv);
-
-	float val = 0.0f;
-	if(du) *du = 0.0f;
-	if(dv) *dv = 0.0f;
-
-	for(int i = 0; i < num_control; i++) {
-		float v = kernel_tex_fetch(__attributes_float, offset + indices[i]);
-
-		val += v * weights[i];
-		if(du) *du += v * weights_du[i];
-		if(dv) *dv += v * weights_dv[i];
-	}
-
-	return val;
+  int indices[PATCH_MAX_CONTROL_VERTS];
+  float weights[PATCH_MAX_CONTROL_VERTS];
+  float weights_du[PATCH_MAX_CONTROL_VERTS];
+  float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+  int num_control = patch_eval_control_verts(
+      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+  float val = 0.0f;
+  if (du)
+    *du = 0.0f;
+  if (dv)
+    *dv = 0.0f;
+
+  for (int i = 0; i < num_control; i++) {
+    float v = kernel_tex_fetch(__attributes_float, offset + indices[i]);
+
+    val += v * weights[i];
+    if (du)
+      *du += v * weights_du[i];
+    if (dv)
+      *dv += v * weights_dv[i];
+  }
+
+  return val;
 }
 
-ccl_device float2 patch_eval_float2(KernelGlobals *kg, const ShaderData *sd, int offset,
-                                    int patch, float u, float v, int channel,
-                                    float2 *du, float2 *dv)
+ccl_device float2 patch_eval_float2(KernelGlobals *kg,
+                                    const ShaderData *sd,
+                                    int offset,
+                                    int patch,
+                                    float u,
+                                    float v,
+                                    int channel,
+                                    float2 *du,
+                                    float2 *dv)
 {
-	int indices[PATCH_MAX_CONTROL_VERTS];
-	float weights[PATCH_MAX_CONTROL_VERTS];
-	float weights_du[PATCH_MAX_CONTROL_VERTS];
-	float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
-	int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel,
-	                                           indices, weights, weights_du, weights_dv);
-
-	float2 val = make_float2(0.0f, 0.0f);
-	if(du) *du = make_float2(0.0f, 0.0f);
-	if(dv) *dv = make_float2(0.0f, 0.0f);
-
-	for(int i = 0; i < num_control; i++) {
-		float2 v = kernel_tex_fetch(__attributes_float2, offset + indices[i]);
-
-		val += v * weights[i];
-		if(du) *du += v * weights_du[i];
-		if(dv) *dv += v * weights_dv[i];
-	}
-
-	return val;
+  int indices[PATCH_MAX_CONTROL_VERTS];
+  float weights[PATCH_MAX_CONTROL_VERTS];
+  float weights_du[PATCH_MAX_CONTROL_VERTS];
+  float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+  int num_control = patch_eval_control_verts(
+      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+  float2 val = make_float2(0.0f, 0.0f);
+  if (du)
+    *du = make_float2(0.0f, 0.0f);
+  if (dv)
+    *dv = make_float2(0.0f, 0.0f);
+
+  for (int i = 0; i < num_control; i++) {
+    float2 v = kernel_tex_fetch(__attributes_float2, offset + indices[i]);
+
+    val += v * weights[i];
+    if (du)
+      *du += v * weights_du[i];
+    if (dv)
+      *dv += v * weights_dv[i];
+  }
+
+  return val;
 }
 
-ccl_device float3 patch_eval_float3(KernelGlobals *kg, const ShaderData *sd, int offset,
-                                    int patch, float u, float v, int channel,
-                                    float3 *du, float3 *dv)
+ccl_device float3 patch_eval_float3(KernelGlobals *kg,
+                                    const ShaderData *sd,
+                                    int offset,
+                                    int patch,
+                                    float u,
+                                    float v,
+                                    int channel,
+                                    float3 *du,
+                                    float3 *dv)
 {
-	int indices[PATCH_MAX_CONTROL_VERTS];
-	float weights[PATCH_MAX_CONTROL_VERTS];
-	float weights_du[PATCH_MAX_CONTROL_VERTS];
-	float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
-	int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel,
-	                                           indices, weights, weights_du, weights_dv);
-
-	float3 val = make_float3(0.0f, 0.0f, 0.0f);
-	if(du) *du = make_float3(0.0f, 0.0f, 0.0f);
-	if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f);
-
-	for(int i = 0; i < num_control; i++) {
-		float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i]));
-
-		val += v * weights[i];
-		if(du) *du += v * weights_du[i];
-		if(dv) *dv += v * weights_dv[i];
-	}
-
-	return val;
+  int indices[PATCH_MAX_CONTROL_VERTS];
+  float weights[PATCH_MAX_CONTROL_VERTS];
+  float weights_du[PATCH_MAX_CONTROL_VERTS];
+  float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+  int num_control = patch_eval_control_verts(
+      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+  float3 val = make_float3(0.0f, 0.0f, 0.0f);
+  if (du)
+    *du = make_float3(0.0f, 0.0f, 0.0f);
+  if (dv)
+    *dv = make_float3(0.0f, 0.0f, 0.0f);
+
+  for (int i = 0; i < num_control; i++) {
+    float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i]));
+
+    val += v * weights[i];
+    if (du)
+      *du += v * weights_du[i];
+    if (dv)
+      *dv += v * weights_dv[i];
+  }
+
+  return val;
 }
 
-ccl_device float3 patch_eval_uchar4(KernelGlobals *kg, const ShaderData *sd, int offset,
-                                    int patch, float u, float v, int channel,
-                                    float3 *du, float3 *dv)
+ccl_device float3 patch_eval_uchar4(KernelGlobals *kg,
+                                    const ShaderData *sd,
+                                    int offset,
+                                    int patch,
+                                    float u,
+                                    float v,
+                                    int channel,
+                                    float3 *du,
+                                    float3 *dv)
 {
-	int indices[PATCH_MAX_CONTROL_VERTS];
-	float weights[PATCH_MAX_CONTROL_VERTS];
-	float weights_du[PATCH_MAX_CONTROL_VERTS];
-	float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
-	int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel,
-	                                           indices, weights, weights_du, weights_dv);
-
-	float3 val = make_float3(0.0f, 0.0f, 0.0f);
-	if(du) *du = make_float3(0.0f, 0.0f, 0.0f);
-	if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f);
-
-	for(int i = 0; i < num_control; i++) {
-		float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i]));
-
-		val += v * weights[i];
-		if(du) *du += v * weights_du[i];
-		if(dv) *dv += v * weights_dv[i];
-	}
-
-	return val;
+  int indices[PATCH_MAX_CONTROL_VERTS];
+  float weights[PATCH_MAX_CONTROL_VERTS];
+  float weights_du[PATCH_MAX_CONTROL_VERTS];
+  float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+  int num_control = patch_eval_control_verts(
+      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+  float3 val = make_float3(0.0f, 0.0f, 0.0f);
+  if (du)
+    *du = make_float3(0.0f, 0.0f, 0.0f);
+  if (dv)
+    *dv = make_float3(0.0f, 0.0f, 0.0f);
+
+  for (int i = 0; i < num_control; i++) {
+    float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i]));
+
+    val += v * weights[i];
+    if (du)
+      *du += v * weights_du[i];
+    if (dv)
+      *dv += v * weights_dv[i];
+  }
+
+  return val;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index 95d9d1050fb..7f2b52a24c4 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -22,57 +22,59 @@
 CCL_NAMESPACE_BEGIN
 
 /* Generic primitive attribute reading functions */
-ccl_device_inline float primitive_attribute_float(KernelGlobals *kg,
-                                                  const ShaderData *sd,
-                                                  const AttributeDescriptor desc,
-                                                  float *dx, float *dy)
+ccl_device_inline float primitive_attribute_float(
+    KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
 {
-	if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
-		if(subd_triangle_patch(kg, sd) == ~0)
-			return triangle_attribute_float(kg, sd, desc, dx, dy);
-		else
-			return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
-	}
+  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    if (subd_triangle_patch(kg, sd) == ~0)
+      return triangle_attribute_float(kg, sd, desc, dx, dy);
+    else
+      return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
+  }
 #ifdef __HAIR__
-	else if(sd->type & PRIMITIVE_ALL_CURVE) {
-		return curve_attribute_float(kg, sd, desc, dx, dy);
-	}
+  else if (sd->type & PRIMITIVE_ALL_CURVE) {
+    return curve_attribute_float(kg, sd, desc, dx, dy);
+  }
 #endif
 #ifdef __VOLUME__
-	else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
-		if(dx) *dx = 0.0f;
-		if(dy) *dy = 0.0f;
-		return volume_attribute_float(kg, sd, desc);
-	}
+  else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+    return volume_attribute_float(kg, sd, desc);
+  }
 #endif
-	else {
-		if(dx) *dx = 0.0f;
-		if(dy) *dy = 0.0f;
-		return 0.0f;
-	}
+  else {
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+    return 0.0f;
+  }
 }
 
-ccl_device_inline float primitive_surface_attribute_float(KernelGlobals *kg,
-                                                          const ShaderData *sd,
-                                                          const AttributeDescriptor desc,
-                                                          float *dx, float *dy)
+ccl_device_inline float primitive_surface_attribute_float(
+    KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
 {
-	if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
-		if(subd_triangle_patch(kg, sd) == ~0)
-			return triangle_attribute_float(kg, sd, desc, dx, dy);
-		else
-			return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
-	}
+  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    if (subd_triangle_patch(kg, sd) == ~0)
+      return triangle_attribute_float(kg, sd, desc, dx, dy);
+    else
+      return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
+  }
 #ifdef __HAIR__
-	else if(sd->type & PRIMITIVE_ALL_CURVE) {
-		return curve_attribute_float(kg, sd, desc, dx, dy);
-	}
+  else if (sd->type & PRIMITIVE_ALL_CURVE) {
+    return curve_attribute_float(kg, sd, desc, dx, dy);
+  }
 #endif
-	else {
-		if(dx) *dx = 0.0f;
-		if(dy) *dy = 0.0f;
-		return 0.0f;
-	}
+  else {
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+    return 0.0f;
+  }
 }
 
 #ifdef __VOLUME__
@@ -80,120 +82,136 @@ ccl_device_inline float primitive_volume_attribute_float(KernelGlobals *kg,
                                                          const ShaderData *sd,
                                                          const AttributeDescriptor desc)
 {
-	if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
-		return volume_attribute_float(kg, sd, desc);
-	}
-	else {
-		return 0.0f;
-	}
+  if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+    return volume_attribute_float(kg, sd, desc);
+  }
+  else {
+    return 0.0f;
+  }
 }
 #endif
 
 ccl_device_inline float2 primitive_attribute_float2(KernelGlobals *kg,
                                                     const ShaderData *sd,
                                                     const AttributeDescriptor desc,
-                                                    float2 *dx, float2 *dy)
+                                                    float2 *dx,
+                                                    float2 *dy)
 {
-	if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
-		if(subd_triangle_patch(kg, sd) == ~0)
-			return triangle_attribute_float2(kg, sd, desc, dx, dy);
-		else
-			return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
-	}
+  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    if (subd_triangle_patch(kg, sd) == ~0)
+      return triangle_attribute_float2(kg, sd, desc, dx, dy);
+    else
+      return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
+  }
 #ifdef __HAIR__
-	else if(sd->type & PRIMITIVE_ALL_CURVE) {
-		return curve_attribute_float2(kg, sd, desc, dx, dy);
-	}
+  else if (sd->type & PRIMITIVE_ALL_CURVE) {
+    return curve_attribute_float2(kg, sd, desc, dx, dy);
+  }
 #endif
 #ifdef __VOLUME__
-	else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
-		kernel_assert(0);
-		if(dx) *dx = make_float2(0.0f, 0.0f);
-		if(dy) *dy = make_float2(0.0f, 0.0f);
-		return make_float2(0.0f, 0.0f);
-	}
+  else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+    kernel_assert(0);
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+    return make_float2(0.0f, 0.0f);
+  }
 #endif
-	else {
-		if(dx) *dx = make_float2(0.0f, 0.0f);
-		if(dy) *dy = make_float2(0.0f, 0.0f);
-		return make_float2(0.0f, 0.0f);
-	}
+  else {
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+    return make_float2(0.0f, 0.0f);
+  }
 }
 
 ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg,
                                                     const ShaderData *sd,
                                                     const AttributeDescriptor desc,
-                                                    float3 *dx, float3 *dy)
+                                                    float3 *dx,
+                                                    float3 *dy)
 {
-	if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
-		if(subd_triangle_patch(kg, sd) == ~0)
-			return triangle_attribute_float3(kg, sd, desc, dx, dy);
-		else
-			return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
-	}
+  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    if (subd_triangle_patch(kg, sd) == ~0)
+      return triangle_attribute_float3(kg, sd, desc, dx, dy);
+    else
+      return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
+  }
 #ifdef __HAIR__
-	else if(sd->type & PRIMITIVE_ALL_CURVE) {
-		return curve_attribute_float3(kg, sd, desc, dx, dy);
-	}
+  else if (sd->type & PRIMITIVE_ALL_CURVE) {
+    return curve_attribute_float3(kg, sd, desc, dx, dy);
+  }
 #endif
 #ifdef __VOLUME__
-	else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
-		if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
-		if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-		return volume_attribute_float3(kg, sd, desc);
-	}
+  else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+    return volume_attribute_float3(kg, sd, desc);
+  }
 #endif
-	else {
-		if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
-		if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  else {
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
 ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals *kg,
                                                             const ShaderData *sd,
                                                             const AttributeDescriptor desc,
-                                                            float2 *dx, float2 *dy)
+                                                            float2 *dx,
+                                                            float2 *dy)
 {
-	if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
-		if(subd_triangle_patch(kg, sd) == ~0)
-			return triangle_attribute_float2(kg, sd, desc, dx, dy);
-		else
-			return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
-	}
+  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    if (subd_triangle_patch(kg, sd) == ~0)
+      return triangle_attribute_float2(kg, sd, desc, dx, dy);
+    else
+      return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
+  }
 #ifdef __HAIR__
-	else if(sd->type & PRIMITIVE_ALL_CURVE) {
-		return curve_attribute_float2(kg, sd, desc, dx, dy);
-	}
+  else if (sd->type & PRIMITIVE_ALL_CURVE) {
+    return curve_attribute_float2(kg, sd, desc, dx, dy);
+  }
 #endif
-	else {
-		if(dx) *dx = make_float2(0.0f, 0.0f);
-		if(dy) *dy = make_float2(0.0f, 0.0f);
-		return make_float2(0.0f, 0.0f);
-	}
+  else {
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+    return make_float2(0.0f, 0.0f);
+  }
 }
 
 ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals *kg,
                                                             const ShaderData *sd,
                                                             const AttributeDescriptor desc,
-                                                            float3 *dx, float3 *dy)
+                                                            float3 *dx,
+                                                            float3 *dy)
 {
-	if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
-		if(subd_triangle_patch(kg, sd) == ~0)
-			return triangle_attribute_float3(kg, sd, desc, dx, dy);
-		else
-			return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
-	}
+  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    if (subd_triangle_patch(kg, sd) == ~0)
+      return triangle_attribute_float3(kg, sd, desc, dx, dy);
+    else
+      return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
+  }
 #ifdef __HAIR__
-	else if(sd->type & PRIMITIVE_ALL_CURVE) {
-		return curve_attribute_float3(kg, sd, desc, dx, dy);
-	}
+  else if (sd->type & PRIMITIVE_ALL_CURVE) {
+    return curve_attribute_float3(kg, sd, desc, dx, dy);
+  }
 #endif
-	else {
-		if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
-		if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  else {
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
 #ifdef __VOLUME__
@@ -201,12 +219,12 @@ ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg,
                                                            const ShaderData *sd,
                                                            const AttributeDescriptor desc)
 {
-	if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
-		return volume_attribute_float3(kg, sd, desc);
-	}
-	else {
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+    return volume_attribute_float3(kg, sd, desc);
+  }
+  else {
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 #endif
 
@@ -214,33 +232,33 @@ ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg,
 
 ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
 {
-	const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV);
+  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV);
 
-	if(desc.offset == ATTR_STD_NOT_FOUND)
-		return make_float3(0.0f, 0.0f, 0.0f);
+  if (desc.offset == ATTR_STD_NOT_FOUND)
+    return make_float3(0.0f, 0.0f, 0.0f);
 
-	float2 uv = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
-	return make_float3(uv.x, uv.y, 1.0f);
+  float2 uv = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
+  return make_float3(uv.x, uv.y, 1.0f);
 }
 
 /* Ptex coordinates */
 
 ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, int *face_id)
 {
-	/* storing ptex data as attributes is not memory efficient but simple for tests */
-	const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID);
-	const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV);
+  /* storing ptex data as attributes is not memory efficient but simple for tests */
+  const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID);
+  const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV);
 
-	if(desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND)
-		return false;
+  if (desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND)
+    return false;
 
-	float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL);
-	float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL);
+  float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL);
+  float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL);
 
-	*uv = make_float2(uv3.x, uv3.y);
-	*face_id = (int)face_id_f;
+  *uv = make_float2(uv3.x, uv3.y);
+  *face_id = (int)face_id_f;
 
-	return true;
+  return true;
 }
 
 /* Surface tangent */
@@ -248,125 +266,125 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in
 ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
 {
 #ifdef __HAIR__
-	if(sd->type & PRIMITIVE_ALL_CURVE)
+  if (sd->type & PRIMITIVE_ALL_CURVE)
 #  ifdef __DPDU__
-		return normalize(sd->dPdu);
+    return normalize(sd->dPdu);
 #  else
-		return make_float3(0.0f, 0.0f, 0.0f);
+    return make_float3(0.0f, 0.0f, 0.0f);
 #  endif
 #endif
 
-	/* try to create spherical tangent from generated coordinates */
-	const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED);
-
-	if(desc.offset != ATTR_STD_NOT_FOUND) {
-		float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
-		data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f);
-		object_normal_transform(kg, sd, &data);
-		return cross(sd->N, normalize(cross(data, sd->N)));
-	}
-	else {
-		/* otherwise use surface derivatives */
+  /* try to create spherical tangent from generated coordinates */
+  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED);
+
+  if (desc.offset != ATTR_STD_NOT_FOUND) {
+    float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+    data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f);
+    object_normal_transform(kg, sd, &data);
+    return cross(sd->N, normalize(cross(data, sd->N)));
+  }
+  else {
+    /* otherwise use surface derivatives */
 #ifdef __DPDU__
-		return normalize(sd->dPdu);
+    return normalize(sd->dPdu);
 #else
-		return make_float3(0.0f, 0.0f, 0.0f);
+    return make_float3(0.0f, 0.0f, 0.0f);
 #endif
-	}
+  }
 }
 
 /* Motion vector for motion pass */
 
 ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
 {
-	/* center position */
-	float3 center;
+  /* center position */
+  float3 center;
 
 #ifdef __HAIR__
-	bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE;
-	if(is_curve_primitive) {
-		center = curve_motion_center_location(kg, sd);
-
-		if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-			object_position_transform(kg, sd, &center);
-		}
-	}
-	else
+  bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE;
+  if (is_curve_primitive) {
+    center = curve_motion_center_location(kg, sd);
+
+    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+      object_position_transform(kg, sd, &center);
+    }
+  }
+  else
 #endif
-		center = sd->P;
+    center = sd->P;
 
-	float3 motion_pre = center, motion_post = center;
+  float3 motion_pre = center, motion_post = center;
 
-	/* deformation motion */
-	AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION);
+  /* deformation motion */
+  AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION);
 
-	if(desc.offset != ATTR_STD_NOT_FOUND) {
-		/* get motion info */
-		int numverts, numkeys;
-		object_motion_info(kg, sd->object, NULL, &numverts, &numkeys);
+  if (desc.offset != ATTR_STD_NOT_FOUND) {
+    /* get motion info */
+    int numverts, numkeys;
+    object_motion_info(kg, sd->object, NULL, &numverts, &numkeys);
 
-		/* lookup attributes */
-		motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+    /* lookup attributes */
+    motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
 
-		desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE)? numverts: numkeys;
-		motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+    desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE) ? numverts : numkeys;
+    motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
 
 #ifdef __HAIR__
-		if(is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
-			object_position_transform(kg, sd, &motion_pre);
-			object_position_transform(kg, sd, &motion_post);
-		}
+    if (is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
+      object_position_transform(kg, sd, &motion_pre);
+      object_position_transform(kg, sd, &motion_post);
+    }
 #endif
-	}
-
-	/* object motion. note that depending on the mesh having motion vectors, this
-	 * transformation was set match the world/object space of motion_pre/post */
-	Transform tfm;
-
-	tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_PRE);
-	motion_pre = transform_point(&tfm, motion_pre);
-
-	tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_POST);
-	motion_post = transform_point(&tfm, motion_post);
-
-	float3 motion_center;
-
-	/* camera motion, for perspective/orthographic motion.pre/post will be a
-	 * world-to-raster matrix, for panorama it's world-to-camera */
-	if(kernel_data.cam.type != CAMERA_PANORAMA) {
-		ProjectionTransform projection = kernel_data.cam.worldtoraster;
-		motion_center = transform_perspective(&projection, center);
-
-		projection = kernel_data.cam.perspective_pre;
-		motion_pre = transform_perspective(&projection, motion_pre);
-
-		projection = kernel_data.cam.perspective_post;
-		motion_post = transform_perspective(&projection, motion_post);
-	}
-	else {
-		tfm = kernel_data.cam.worldtocamera;
-		motion_center = normalize(transform_point(&tfm, center));
-		motion_center = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_center));
-		motion_center.x *= kernel_data.cam.width;
-		motion_center.y *= kernel_data.cam.height;
-
-		tfm = kernel_data.cam.motion_pass_pre;
-		motion_pre = normalize(transform_point(&tfm, motion_pre));
-		motion_pre = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_pre));
-		motion_pre.x *= kernel_data.cam.width;
-		motion_pre.y *= kernel_data.cam.height;
-
-		tfm = kernel_data.cam.motion_pass_post;
-		motion_post = normalize(transform_point(&tfm, motion_post));
-		motion_post = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_post));
-		motion_post.x *= kernel_data.cam.width;
-		motion_post.y *= kernel_data.cam.height;
-	}
-
-	motion_pre = motion_pre - motion_center;
-	motion_post = motion_center - motion_post;
-
-	return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y);
+  }
+
+  /* object motion. note that depending on the mesh having motion vectors, this
+   * transformation was set match the world/object space of motion_pre/post */
+  Transform tfm;
+
+  tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_PRE);
+  motion_pre = transform_point(&tfm, motion_pre);
+
+  tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_POST);
+  motion_post = transform_point(&tfm, motion_post);
+
+  float3 motion_center;
+
+  /* camera motion, for perspective/orthographic motion.pre/post will be a
+   * world-to-raster matrix, for panorama it's world-to-camera */
+  if (kernel_data.cam.type != CAMERA_PANORAMA) {
+    ProjectionTransform projection = kernel_data.cam.worldtoraster;
+    motion_center = transform_perspective(&projection, center);
+
+    projection = kernel_data.cam.perspective_pre;
+    motion_pre = transform_perspective(&projection, motion_pre);
+
+    projection = kernel_data.cam.perspective_post;
+    motion_post = transform_perspective(&projection, motion_post);
+  }
+  else {
+    tfm = kernel_data.cam.worldtocamera;
+    motion_center = normalize(transform_point(&tfm, center));
+    motion_center = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_center));
+    motion_center.x *= kernel_data.cam.width;
+    motion_center.y *= kernel_data.cam.height;
+
+    tfm = kernel_data.cam.motion_pass_pre;
+    motion_pre = normalize(transform_point(&tfm, motion_pre));
+    motion_pre = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_pre));
+    motion_pre.x *= kernel_data.cam.width;
+    motion_pre.y *= kernel_data.cam.height;
+
+    tfm = kernel_data.cam.motion_pass_post;
+    motion_post = normalize(transform_point(&tfm, motion_post));
+    motion_post = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_post));
+    motion_post.x *= kernel_data.cam.width;
+    motion_post.y *= kernel_data.cam.height;
+  }
+
+  motion_pre = motion_pre - motion_center;
+  motion_post = motion_center - motion_post;
+
+  return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h
index 251e070c21f..8d5b3c12833 100644
--- a/intern/cycles/kernel/geom/geom_subd_triangle.h
+++ b/intern/cycles/kernel/geom/geom_subd_triangle.h
@@ -22,455 +22,492 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *sd)
 {
-	return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0;
+  return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0;
 }
 
 /* UV coords of triangle within patch */
 
-ccl_device_inline void subd_triangle_patch_uv(KernelGlobals *kg, const ShaderData *sd, float2 uv[3])
+ccl_device_inline void subd_triangle_patch_uv(KernelGlobals *kg,
+                                              const ShaderData *sd,
+                                              float2 uv[3])
 {
-	uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
 
-	uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x);
-	uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y);
-	uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z);
+  uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x);
+  uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y);
+  uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z);
 }
 
 /* Vertex indices of patch */
 
 ccl_device_inline uint4 subd_triangle_patch_indices(KernelGlobals *kg, int patch)
 {
-	uint4 indices;
+  uint4 indices;
 
-	indices.x = kernel_tex_fetch(__patches, patch+0);
-	indices.y = kernel_tex_fetch(__patches, patch+1);
-	indices.z = kernel_tex_fetch(__patches, patch+2);
-	indices.w = kernel_tex_fetch(__patches, patch+3);
+  indices.x = kernel_tex_fetch(__patches, patch + 0);
+  indices.y = kernel_tex_fetch(__patches, patch + 1);
+  indices.z = kernel_tex_fetch(__patches, patch + 2);
+  indices.w = kernel_tex_fetch(__patches, patch + 3);
 
-	return indices;
+  return indices;
 }
 
 /* Originating face for patch */
 
 ccl_device_inline uint subd_triangle_patch_face(KernelGlobals *kg, int patch)
 {
-	return kernel_tex_fetch(__patches, patch+4);
+  return kernel_tex_fetch(__patches, patch + 4);
 }
 
 /* Number of corners on originating face */
 
 ccl_device_inline uint subd_triangle_patch_num_corners(KernelGlobals *kg, int patch)
 {
-	return kernel_tex_fetch(__patches, patch+5) & 0xffff;
+  return kernel_tex_fetch(__patches, patch + 5) & 0xffff;
 }
 
 /* Indices of the four corners that are used by the patch */
 
 ccl_device_inline void subd_triangle_patch_corners(KernelGlobals *kg, int patch, int corners[4])
 {
-	uint4 data;
-
-	data.x = kernel_tex_fetch(__patches, patch+4);
-	data.y = kernel_tex_fetch(__patches, patch+5);
-	data.z = kernel_tex_fetch(__patches, patch+6);
-	data.w = kernel_tex_fetch(__patches, patch+7);
-
-	int num_corners = data.y & 0xffff;
-
-	if(num_corners == 4) {
-		/* quad */
-		corners[0] = data.z;
-		corners[1] = data.z+1;
-		corners[2] = data.z+2;
-		corners[3] = data.z+3;
-	}
-	else {
-		/* ngon */
-		int c = data.y >> 16;
-
-		corners[0] = data.z + c;
-		corners[1] = data.z + mod(c+1, num_corners);
-		corners[2] = data.w;
-		corners[3] = data.z + mod(c-1, num_corners);
-	}
+  uint4 data;
+
+  data.x = kernel_tex_fetch(__patches, patch + 4);
+  data.y = kernel_tex_fetch(__patches, patch + 5);
+  data.z = kernel_tex_fetch(__patches, patch + 6);
+  data.w = kernel_tex_fetch(__patches, patch + 7);
+
+  int num_corners = data.y & 0xffff;
+
+  if (num_corners == 4) {
+    /* quad */
+    corners[0] = data.z;
+    corners[1] = data.z + 1;
+    corners[2] = data.z + 2;
+    corners[3] = data.z + 3;
+  }
+  else {
+    /* ngon */
+    int c = data.y >> 16;
+
+    corners[0] = data.z + c;
+    corners[1] = data.z + mod(c + 1, num_corners);
+    corners[2] = data.w;
+    corners[3] = data.z + mod(c - 1, num_corners);
+  }
 }
 
 /* Reading attributes on various subdivision triangle elements */
 
-ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
+ccl_device_noinline float subd_triangle_attribute_float(
+    KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
 {
-	int patch = subd_triangle_patch(kg, sd);
+  int patch = subd_triangle_patch(kg, sd);
 
 #ifdef __PATCH_EVAL__
-	if(desc.flags & ATTR_SUBDIVIDED) {
-		float2 uv[3];
-		subd_triangle_patch_uv(kg, sd, uv);
-
-		float2 dpdu = uv[0] - uv[2];
-		float2 dpdv = uv[1] - uv[2];
-
-		/* p is [s, t] */
-		float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
-
-		float a, dads, dadt;
-		a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+  if (desc.flags & ATTR_SUBDIVIDED) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    float2 dpdu = uv[0] - uv[2];
+    float2 dpdv = uv[1] - uv[2];
+
+    /* p is [s, t] */
+    float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+
+    float a, dads, dadt;
+    a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx || dy) {
+      float dsdu = dpdu.x;
+      float dtdu = dpdu.y;
+      float dsdv = dpdv.x;
+      float dtdv = dpdv.y;
+
+      if (dx) {
+        float dudx = sd->du.dx;
+        float dvdx = sd->dv.dx;
+
+        float dsdx = dsdu * dudx + dsdv * dvdx;
+        float dtdx = dtdu * dudx + dtdv * dvdx;
+
+        *dx = dads * dsdx + dadt * dtdx;
+      }
+      if (dy) {
+        float dudy = sd->du.dy;
+        float dvdy = sd->dv.dy;
+
+        float dsdy = dsdu * dudy + dsdv * dvdy;
+        float dtdy = dtdu * dudy + dtdv * dvdy;
+
+        *dy = dads * dsdy + dadt * dtdy;
+      }
+    }
+#  endif
+
+    return a;
+  }
+  else
+#endif /* __PATCH_EVAL__ */
+      if (desc.element == ATTR_ELEMENT_FACE) {
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+
+    return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch));
+  }
+  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    uint4 v = subd_triangle_patch_indices(kg, patch);
+
+    float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x);
+    float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y);
+    float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z);
+    float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w);
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
 
 #ifdef __RAY_DIFFERENTIALS__
-		if(dx || dy) {
-			float dsdu = dpdu.x;
-			float dtdu = dpdu.y;
-			float dsdv = dpdv.x;
-			float dtdv = dpdv.y;
-
-			if(dx) {
-				float dudx = sd->du.dx;
-				float dvdx = sd->dv.dx;
-
-				float dsdx = dsdu*dudx + dsdv*dvdx;
-				float dtdx = dtdu*dudx + dtdv*dvdx;
-
-				*dx = dads*dsdx + dadt*dtdx;
-			}
-			if(dy) {
-				float dudy = sd->du.dy;
-				float dvdy = sd->dv.dy;
-
-				float dsdy = dsdu*dudy + dsdv*dvdy;
-				float dtdy = dtdu*dudy + dtdv*dvdy;
-
-				*dy = dads*dsdy + dadt*dtdy;
-			}
-		}
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
 #endif
 
-		return a;
-	}
-	else
-#endif  /* __PATCH_EVAL__ */
-	if(desc.element == ATTR_ELEMENT_FACE) {
-		if(dx) *dx = 0.0f;
-		if(dy) *dy = 0.0f;
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else if (desc.element == ATTR_ELEMENT_CORNER) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
 
-		return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch));
-	}
-	else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
-		float2 uv[3];
-		subd_triangle_patch_uv(kg, sd, uv);
+    int corners[4];
+    subd_triangle_patch_corners(kg, patch, corners);
 
-		uint4 v = subd_triangle_patch_indices(kg, patch);
+    float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset);
+    float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset);
+    float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset);
+    float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset);
 
-		float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x);
-		float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y);
-		float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z);
-		float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w);
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
 
-		if(subd_triangle_patch_num_corners(kg, patch) != 4) {
-			f1 = (f1+f0)*0.5f;
-			f3 = (f3+f0)*0.5f;
-		}
-
-		float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-		float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-		float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+    float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
 
 #ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
-		if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
 #endif
 
-		return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
-	}
-	else if(desc.element == ATTR_ELEMENT_CORNER) {
-		float2 uv[3];
-		subd_triangle_patch_uv(kg, sd, uv);
-
-		int corners[4];
-		subd_triangle_patch_corners(kg, patch, corners);
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else {
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
 
-		float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset);
-		float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset);
-		float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset);
-		float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset);
-
-		if(subd_triangle_patch_num_corners(kg, patch) != 4) {
-			f1 = (f1+f0)*0.5f;
-			f3 = (f3+f0)*0.5f;
-		}
-
-		float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-		float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-		float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
-
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
-		if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
-#endif
-
-		return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
-	}
-	else {
-		if(dx) *dx = 0.0f;
-		if(dy) *dy = 0.0f;
-
-		return 0.0f;
-	}
+    return 0.0f;
+  }
 }
 
-ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy)
+ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals *kg,
+                                                          const ShaderData *sd,
+                                                          const AttributeDescriptor desc,
+                                                          float2 *dx,
+                                                          float2 *dy)
 {
-	int patch = subd_triangle_patch(kg, sd);
+  int patch = subd_triangle_patch(kg, sd);
 
 #ifdef __PATCH_EVAL__
-	if(desc.flags & ATTR_SUBDIVIDED) {
-		float2 uv[3];
-		subd_triangle_patch_uv(kg, sd, uv);
-
-		float2 dpdu = uv[0] - uv[2];
-		float2 dpdv = uv[1] - uv[2];
-
-		/* p is [s, t] */
-		float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
-
-		float2 a, dads, dadt;
-
-		a = patch_eval_float2(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
-
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx || dy) {
-			float dsdu = dpdu.x;
-			float dtdu = dpdu.y;
-			float dsdv = dpdv.x;
-			float dtdv = dpdv.y;
-
-			if(dx) {
-				float dudx = sd->du.dx;
-				float dvdx = sd->dv.dx;
-
-				float dsdx = dsdu*dudx + dsdv*dvdx;
-				float dtdx = dtdu*dudx + dtdv*dvdx;
-
-				*dx = dads*dsdx + dadt*dtdx;
-			}
-			if(dy) {
-				float dudy = sd->du.dy;
-				float dvdy = sd->dv.dy;
-
-				float dsdy = dsdu*dudy + dsdv*dvdy;
-				float dtdy = dtdu*dudy + dtdv*dvdy;
-
-				*dy = dads*dsdy + dadt*dtdy;
-			}
-		}
-#endif
-
-		return a;
-	}
-	else
-#endif  /* __PATCH_EVAL__ */
-		if(desc.element == ATTR_ELEMENT_FACE) {
-			if(dx) *dx = make_float2(0.0f, 0.0f);
-			if(dy) *dy = make_float2(0.0f, 0.0f);
-
-			return kernel_tex_fetch(__attributes_float2, desc.offset + subd_triangle_patch_face(kg, patch));
-		}
-		else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
-			float2 uv[3];
-			subd_triangle_patch_uv(kg, sd, uv);
-
-			uint4 v = subd_triangle_patch_indices(kg, patch);
-
-			float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + v.x);
-			float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + v.y);
-			float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + v.z);
-			float2 f3 = kernel_tex_fetch(__attributes_float2, desc.offset + v.w);
-
-			if(subd_triangle_patch_num_corners(kg, patch) != 4) {
-				f1 = (f1+f0)*0.5f;
-				f3 = (f3+f0)*0.5f;
-			}
-
-			float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-			float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-			float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+  if (desc.flags & ATTR_SUBDIVIDED) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    float2 dpdu = uv[0] - uv[2];
+    float2 dpdv = uv[1] - uv[2];
+
+    /* p is [s, t] */
+    float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+
+    float2 a, dads, dadt;
+
+    a = patch_eval_float2(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx || dy) {
+      float dsdu = dpdu.x;
+      float dtdu = dpdu.y;
+      float dsdv = dpdv.x;
+      float dtdv = dpdv.y;
+
+      if (dx) {
+        float dudx = sd->du.dx;
+        float dvdx = sd->dv.dx;
+
+        float dsdx = dsdu * dudx + dsdv * dvdx;
+        float dtdx = dtdu * dudx + dtdv * dvdx;
+
+        *dx = dads * dsdx + dadt * dtdx;
+      }
+      if (dy) {
+        float dudy = sd->du.dy;
+        float dvdy = sd->dv.dy;
+
+        float dsdy = dsdu * dudy + dsdv * dvdy;
+        float dtdy = dtdu * dudy + dtdv * dvdy;
+
+        *dy = dads * dsdy + dadt * dtdy;
+      }
+    }
+#  endif
+
+    return a;
+  }
+  else
+#endif /* __PATCH_EVAL__ */
+      if (desc.element == ATTR_ELEMENT_FACE) {
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+
+    return kernel_tex_fetch(__attributes_float2,
+                            desc.offset + subd_triangle_patch_face(kg, patch));
+  }
+  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    uint4 v = subd_triangle_patch_indices(kg, patch);
+
+    float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + v.x);
+    float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + v.y);
+    float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + v.z);
+    float2 f3 = kernel_tex_fetch(__attributes_float2, desc.offset + v.w);
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
 
 #ifdef __RAY_DIFFERENTIALS__
-			if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
-			if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
 #endif
 
-			return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
-		}
-		else if(desc.element == ATTR_ELEMENT_CORNER) {
-			float2 uv[3];
-			subd_triangle_patch_uv(kg, sd, uv);
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else if (desc.element == ATTR_ELEMENT_CORNER) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
 
-			int corners[4];
-			subd_triangle_patch_corners(kg, patch, corners);
+    int corners[4];
+    subd_triangle_patch_corners(kg, patch, corners);
 
-			float2 f0, f1, f2, f3;
+    float2 f0, f1, f2, f3;
 
-			f0 = kernel_tex_fetch(__attributes_float2, corners[0] + desc.offset);
-			f1 = kernel_tex_fetch(__attributes_float2, corners[1] + desc.offset);
-			f2 = kernel_tex_fetch(__attributes_float2, corners[2] + desc.offset);
-			f3 = kernel_tex_fetch(__attributes_float2, corners[3] + desc.offset);
+    f0 = kernel_tex_fetch(__attributes_float2, corners[0] + desc.offset);
+    f1 = kernel_tex_fetch(__attributes_float2, corners[1] + desc.offset);
+    f2 = kernel_tex_fetch(__attributes_float2, corners[2] + desc.offset);
+    f3 = kernel_tex_fetch(__attributes_float2, corners[3] + desc.offset);
 
-			if(subd_triangle_patch_num_corners(kg, patch) != 4) {
-				f1 = (f1+f0)*0.5f;
-				f3 = (f3+f0)*0.5f;
-			}
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
 
-			float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-			float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-			float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+    float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
 
 #ifdef __RAY_DIFFERENTIALS__
-			if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
-			if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
 #endif
 
-			return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
-		}
-		else {
-			if(dx) *dx = make_float2(0.0f, 0.0f);
-			if(dy) *dy = make_float2(0.0f, 0.0f);
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else {
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
 
-			return make_float2(0.0f, 0.0f);
-		}
+    return make_float2(0.0f, 0.0f);
+  }
 }
 
-ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
+ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg,
+                                                          const ShaderData *sd,
+                                                          const AttributeDescriptor desc,
+                                                          float3 *dx,
+                                                          float3 *dy)
 {
-	int patch = subd_triangle_patch(kg, sd);
+  int patch = subd_triangle_patch(kg, sd);
 
 #ifdef __PATCH_EVAL__
-	if(desc.flags & ATTR_SUBDIVIDED) {
-		float2 uv[3];
-		subd_triangle_patch_uv(kg, sd, uv);
-
-		float2 dpdu = uv[0] - uv[2];
-		float2 dpdv = uv[1] - uv[2];
-
-		/* p is [s, t] */
-		float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
-
-		float3 a, dads, dadt;
-
-		if(desc.element == ATTR_ELEMENT_CORNER_BYTE) {
-			a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
-		}
-		else {
-			a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
-		}
-
-#ifdef __RAY_DIFFERENTIALS__
-		if(dx || dy) {
-			float dsdu = dpdu.x;
-			float dtdu = dpdu.y;
-			float dsdv = dpdv.x;
-			float dtdv = dpdv.y;
-
-			if(dx) {
-				float dudx = sd->du.dx;
-				float dvdx = sd->dv.dx;
-
-				float dsdx = dsdu*dudx + dsdv*dvdx;
-				float dtdx = dtdu*dudx + dtdv*dvdx;
-
-				*dx = dads*dsdx + dadt*dtdx;
-			}
-			if(dy) {
-				float dudy = sd->du.dy;
-				float dvdy = sd->dv.dy;
-
-				float dsdy = dsdu*dudy + dsdv*dvdy;
-				float dtdy = dtdu*dudy + dtdv*dvdy;
-
-				*dy = dads*dsdy + dadt*dtdy;
-			}
-		}
-#endif
-
-		return a;
-	}
-	else
-#endif  /* __PATCH_EVAL__ */
-	if(desc.element == ATTR_ELEMENT_FACE) {
-		if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
-		if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-
-		return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch)));
-	}
-	else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
-		float2 uv[3];
-		subd_triangle_patch_uv(kg, sd, uv);
-
-		uint4 v = subd_triangle_patch_indices(kg, patch);
-
-		float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x));
-		float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y));
-		float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z));
-		float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w));
-
-		if(subd_triangle_patch_num_corners(kg, patch) != 4) {
-			f1 = (f1+f0)*0.5f;
-			f3 = (f3+f0)*0.5f;
-		}
-
-		float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-		float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-		float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+  if (desc.flags & ATTR_SUBDIVIDED) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    float2 dpdu = uv[0] - uv[2];
+    float2 dpdv = uv[1] - uv[2];
+
+    /* p is [s, t] */
+    float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+
+    float3 a, dads, dadt;
+
+    if (desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+      a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+    }
+    else {
+      a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+    }
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx || dy) {
+      float dsdu = dpdu.x;
+      float dtdu = dpdu.y;
+      float dsdv = dpdv.x;
+      float dtdv = dpdv.y;
+
+      if (dx) {
+        float dudx = sd->du.dx;
+        float dvdx = sd->dv.dx;
+
+        float dsdx = dsdu * dudx + dsdv * dvdx;
+        float dtdx = dtdu * dudx + dtdv * dvdx;
+
+        *dx = dads * dsdx + dadt * dtdx;
+      }
+      if (dy) {
+        float dudy = sd->du.dy;
+        float dvdy = sd->dv.dy;
+
+        float dsdy = dsdu * dudy + dsdv * dvdy;
+        float dtdy = dtdu * dudy + dtdv * dvdy;
+
+        *dy = dads * dsdy + dadt * dtdy;
+      }
+    }
+#  endif
+
+    return a;
+  }
+  else
+#endif /* __PATCH_EVAL__ */
+      if (desc.element == ATTR_ELEMENT_FACE) {
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+    return float4_to_float3(
+        kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch)));
+  }
+  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    uint4 v = subd_triangle_patch_indices(kg, patch);
+
+    float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x));
+    float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y));
+    float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z));
+    float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w));
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
 
 #ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
-		if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
 #endif
 
-		return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
-	}
-	else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
-		float2 uv[3];
-		subd_triangle_patch_uv(kg, sd, uv);
-
-		int corners[4];
-		subd_triangle_patch_corners(kg, patch, corners);
-
-		float3 f0, f1, f2, f3;
-
-		if(desc.element == ATTR_ELEMENT_CORNER) {
-			f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset));
-			f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset));
-			f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset));
-			f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset));
-		}
-		else {
-			f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset));
-			f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset));
-			f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset));
-			f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset));
-		}
-
-		if(subd_triangle_patch_num_corners(kg, patch) != 4) {
-			f1 = (f1+f0)*0.5f;
-			f3 = (f3+f0)*0.5f;
-		}
-
-		float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-		float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-		float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    int corners[4];
+    subd_triangle_patch_corners(kg, patch, corners);
+
+    float3 f0, f1, f2, f3;
+
+    if (desc.element == ATTR_ELEMENT_CORNER) {
+      f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset));
+      f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset));
+      f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset));
+      f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset));
+    }
+    else {
+      f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset));
+      f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset));
+      f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset));
+      f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset));
+    }
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
 
 #ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c;
-		if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c;
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
 #endif
 
-		return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c;
-	}
-	else {
-		if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
-		if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else {
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
 
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 300227c38e6..9938c0ba2c3 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -25,227 +25,268 @@ CCL_NAMESPACE_BEGIN
 /* normal on triangle  */
 ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd)
 {
-	/* load triangle vertices */
-	const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
-	const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
-	const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
-	const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
-
-	/* return normal */
-	if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-		return normalize(cross(v2 - v0, v1 - v0));
-	}
-	else {
-		return normalize(cross(v1 - v0, v2 - v0));
-	}
+  /* load triangle vertices */
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+  const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
+  const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
+  const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
+
+  /* return normal */
+  if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+    return normalize(cross(v2 - v0, v1 - v0));
+  }
+  else {
+    return normalize(cross(v1 - v0, v2 - v0));
+  }
 }
 
 /* point and normal on triangle  */
-ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader)
+ccl_device_inline void triangle_point_normal(
+    KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader)
 {
-	/* load triangle vertices */
-	const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-	float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
-	float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
-	float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
-	/* compute point */
-	float t = 1.0f - u - v;
-	*P = (u*v0 + v*v1 + t*v2);
-	/* get object flags */
-	int object_flag = kernel_tex_fetch(__object_flag, object);
-	/* compute normal */
-	if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-		*Ng = normalize(cross(v2 - v0, v1 - v0));
-	}
-	else {
-		*Ng = normalize(cross(v1 - v0, v2 - v0));
-	}
-	/* shader`*/
-	*shader = kernel_tex_fetch(__tri_shader, prim);
+  /* load triangle vertices */
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+  float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
+  float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
+  float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
+  /* compute point */
+  float t = 1.0f - u - v;
+  *P = (u * v0 + v * v1 + t * v2);
+  /* get object flags */
+  int object_flag = kernel_tex_fetch(__object_flag, object);
+  /* compute normal */
+  if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+    *Ng = normalize(cross(v2 - v0, v1 - v0));
+  }
+  else {
+    *Ng = normalize(cross(v1 - v0, v2 - v0));
+  }
+  /* shader`*/
+  *shader = kernel_tex_fetch(__tri_shader, prim);
 }
 
 /* Triangle vertex locations */
 
 ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3])
 {
-	const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-	P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
-	P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
-	P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+  P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
+  P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
+  P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
 }
 
 /* Interpolate smooth vertex normal from vertices */
 
-ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int prim, float u, float v)
+ccl_device_inline float3
+triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int prim, float u, float v)
 {
-	/* load triangle vertices */
-	const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-	float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
-	float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
-	float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
+  /* load triangle vertices */
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+  float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
+  float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
+  float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
 
-	float3 N = safe_normalize((1.0f - u - v)*n2 + u*n0 + v*n1);
+  float3 N = safe_normalize((1.0f - u - v) * n2 + u * n0 + v * n1);
 
-	return is_zero(N)? Ng: N;
+  return is_zero(N) ? Ng : N;
 }
 
 /* Ray differentials on triangle */
 
-ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_space float3 *dPdu, ccl_addr_space float3 *dPdv)
+ccl_device_inline void triangle_dPdudv(KernelGlobals *kg,
+                                       int prim,
+                                       ccl_addr_space float3 *dPdu,
+                                       ccl_addr_space float3 *dPdv)
 {
-	/* fetch triangle vertex coordinates */
-	const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-	const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
-	const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
-	const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
-
-	/* compute derivatives of P w.r.t. uv */
-	*dPdu = (p0 - p2);
-	*dPdv = (p1 - p2);
+  /* fetch triangle vertex coordinates */
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+  const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
+  const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
+  const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
+
+  /* compute derivatives of P w.r.t. uv */
+  *dPdu = (p0 - p2);
+  *dPdv = (p1 - p2);
 }
 
 /* Reading attributes on various triangle elements */
 
-ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
+ccl_device float triangle_attribute_float(
+    KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
 {
-	if(desc.element == ATTR_ELEMENT_FACE) {
-		if(dx) *dx = 0.0f;
-		if(dy) *dy = 0.0f;
+  if (desc.element == ATTR_ELEMENT_FACE) {
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
 
-		return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim);
-	}
-	else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
-		uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+    return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim);
+  }
+  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+    uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
 
-		float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x);
-		float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y);
-		float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z);
+    float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x);
+    float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y);
+    float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z);
 
 #ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
-		if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+    if (dx)
+      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+    if (dy)
+      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
 #endif
 
-		return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
-	}
-	else if(desc.element == ATTR_ELEMENT_CORNER) {
-		int tri = desc.offset + sd->prim*3;
-		float f0 = kernel_tex_fetch(__attributes_float, tri + 0);
-		float f1 = kernel_tex_fetch(__attributes_float, tri + 1);
-		float f2 = kernel_tex_fetch(__attributes_float, tri + 2);
+    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+  }
+  else if (desc.element == ATTR_ELEMENT_CORNER) {
+    int tri = desc.offset + sd->prim * 3;
+    float f0 = kernel_tex_fetch(__attributes_float, tri + 0);
+    float f1 = kernel_tex_fetch(__attributes_float, tri + 1);
+    float f2 = kernel_tex_fetch(__attributes_float, tri + 2);
 
 #ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
-		if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+    if (dx)
+      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+    if (dy)
+      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
 #endif
 
-		return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
-	}
-	else {
-		if(dx) *dx = 0.0f;
-		if(dy) *dy = 0.0f;
+    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+  }
+  else {
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
 
-		return 0.0f;
-	}
+    return 0.0f;
+  }
 }
 
-ccl_device float2 triangle_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy)
+ccl_device float2 triangle_attribute_float2(KernelGlobals *kg,
+                                            const ShaderData *sd,
+                                            const AttributeDescriptor desc,
+                                            float2 *dx,
+                                            float2 *dy)
 {
-	if(desc.element == ATTR_ELEMENT_FACE) {
-		if(dx) *dx = make_float2(0.0f, 0.0f);
-		if(dy) *dy = make_float2(0.0f, 0.0f);
+  if (desc.element == ATTR_ELEMENT_FACE) {
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
 
-		return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim);
-	}
-	else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
-		uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+    return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim);
+  }
+  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+    uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
 
-		float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.x);
-		float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.y);
-		float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.z);
+    float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.x);
+    float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.y);
+    float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.z);
 
 #ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
-		if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+    if (dx)
+      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+    if (dy)
+      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
 #endif
 
-		return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
-	}
-	else if(desc.element == ATTR_ELEMENT_CORNER) {
-		int tri = desc.offset + sd->prim*3;
-		float2 f0, f1, f2;
+    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+  }
+  else if (desc.element == ATTR_ELEMENT_CORNER) {
+    int tri = desc.offset + sd->prim * 3;
+    float2 f0, f1, f2;
 
-		if(desc.element == ATTR_ELEMENT_CORNER) {
-			f0 = kernel_tex_fetch(__attributes_float2, tri + 0);
-			f1 = kernel_tex_fetch(__attributes_float2, tri + 1);
-			f2 = kernel_tex_fetch(__attributes_float2, tri + 2);
-		}
+    if (desc.element == ATTR_ELEMENT_CORNER) {
+      f0 = kernel_tex_fetch(__attributes_float2, tri + 0);
+      f1 = kernel_tex_fetch(__attributes_float2, tri + 1);
+      f2 = kernel_tex_fetch(__attributes_float2, tri + 2);
+    }
 
 #ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
-		if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+    if (dx)
+      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+    if (dy)
+      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
 #endif
 
-		return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
-	}
-	else {
-		if(dx) *dx = make_float2(0.0f, 0.0f);
-		if(dy) *dy = make_float2(0.0f, 0.0f);
+    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+  }
+  else {
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
 
-		return make_float2(0.0f, 0.0f);
-	}
+    return make_float2(0.0f, 0.0f);
+  }
 }
 
-ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
+ccl_device float3 triangle_attribute_float3(KernelGlobals *kg,
+                                            const ShaderData *sd,
+                                            const AttributeDescriptor desc,
+                                            float3 *dx,
+                                            float3 *dy)
 {
-	if(desc.element == ATTR_ELEMENT_FACE) {
-		if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
-		if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-
-		return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim));
-	}
-	else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
-		uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
-
-		float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x));
-		float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y));
-		float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z));
+  if (desc.element == ATTR_ELEMENT_FACE) {
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+    return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim));
+  }
+  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+    uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+
+    float3 f0 = float4_to_float3(
+        kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x));
+    float3 f1 = float4_to_float3(
+        kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y));
+    float3 f2 = float4_to_float3(
+        kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z));
 
 #ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
-		if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+    if (dx)
+      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+    if (dy)
+      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
 #endif
 
-		return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
-	}
-	else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
-		int tri = desc.offset + sd->prim*3;
-		float3 f0, f1, f2;
-
-		if(desc.element == ATTR_ELEMENT_CORNER) {
-			f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
-			f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
-			f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
-		}
-		else {
-			f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 0));
-			f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 1));
-			f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 2));
-		}
+    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+  }
+  else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+    int tri = desc.offset + sd->prim * 3;
+    float3 f0, f1, f2;
+
+    if (desc.element == ATTR_ELEMENT_CORNER) {
+      f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
+      f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
+      f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
+    }
+    else {
+      f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 0));
+      f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 1));
+      f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 2));
+    }
 
 #ifdef __RAY_DIFFERENTIALS__
-		if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
-		if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+    if (dx)
+      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+    if (dy)
+      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
 #endif
 
-		return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
-	}
-	else {
-		if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
-		if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
+    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+  }
+  else {
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
 
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 56dbc4473fa..bcad03102d2 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -1,4 +1,4 @@
-			/*
+/*
  * Copyright 2014, Blender Foundation.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,447 +30,464 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
                                           int object,
                                           int prim_addr)
 {
-	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
+  const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
 #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-	const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
+  const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex];
 #else
-	const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
-	             tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
-	             tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
+  const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
+               tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
+               tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2);
 #endif
-	float t, u, v;
-	if(ray_triangle_intersect(P,
-	                          dir,
-	                          isect->t,
+  float t, u, v;
+  if (ray_triangle_intersect(P,
+                             dir,
+                             isect->t,
 #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-	                          ssef_verts,
+                             ssef_verts,
 #else
-	                          float4_to_float3(tri_a),
-	                          float4_to_float3(tri_b),
-	                          float4_to_float3(tri_c),
+                             float4_to_float3(tri_a),
+                             float4_to_float3(tri_b),
+                             float4_to_float3(tri_c),
 #endif
-	                          &u, &v, &t))
-	{
+                             &u,
+                             &v,
+                             &t)) {
 #ifdef __VISIBILITY_FLAG__
-		/* Visibility flag test. we do it here under the assumption
-		 * that most triangles are culled by node flags.
-		 */
-		if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
+    /* Visibility flag test. we do it here under the assumption
+     * that most triangles are culled by node flags.
+     */
+    if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
 #endif
-		{
-			isect->prim = prim_addr;
-			isect->object = object;
-			isect->type = PRIMITIVE_TRIANGLE;
-			isect->u = u;
-			isect->v = v;
-			isect->t = t;
-			return true;
-		}
-	}
-	return false;
+    {
+      isect->prim = prim_addr;
+      isect->object = object;
+      isect->type = PRIMITIVE_TRIANGLE;
+      isect->u = u;
+      isect->v = v;
+      isect->t = t;
+      return true;
+    }
+  }
+  return false;
 }
 
 #ifdef __KERNEL_AVX2__
-#define	cross256(A,B, C,D) _mm256_fmsub_ps(A,B, _mm256_mul_ps(C,D))
-ccl_device_inline int ray_triangle_intersect8(
-            KernelGlobals *kg,
-            float3 ray_P,
-            float3 ray_dir,
-            Intersection **isect,
-            uint visibility,
-            int object,
-            __m256 *triA,
-            __m256 *triB,
-            __m256 *triC,
-            int prim_addr,
-            int prim_num,
-            uint *num_hits,
-            uint max_hits,
-            int *num_hits_in_instance,
-            float isect_t)
+#  define cross256(A, B, C, D) _mm256_fmsub_ps(A, B, _mm256_mul_ps(C, D))
+ccl_device_inline int ray_triangle_intersect8(KernelGlobals *kg,
+                                              float3 ray_P,
+                                              float3 ray_dir,
+                                              Intersection **isect,
+                                              uint visibility,
+                                              int object,
+                                              __m256 *triA,
+                                              __m256 *triB,
+                                              __m256 *triC,
+                                              int prim_addr,
+                                              int prim_num,
+                                              uint *num_hits,
+                                              uint max_hits,
+                                              int *num_hits_in_instance,
+                                              float isect_t)
 {
 
-	const unsigned char prim_num_mask = (1 << prim_num) - 1;
-
-	const __m256i zero256 = _mm256_setzero_si256();
-
-	const __m256 Px256 = _mm256_set1_ps(ray_P.x);
-	const __m256 Py256 = _mm256_set1_ps(ray_P.y);
-	const __m256 Pz256 = _mm256_set1_ps(ray_P.z);
-
-	const __m256 dirx256 = _mm256_set1_ps(ray_dir.x);
-	const __m256 diry256 = _mm256_set1_ps(ray_dir.y);
-	const __m256 dirz256 = _mm256_set1_ps(ray_dir.z);
-
-	/* Calculate vertices relative to ray origin. */
-	__m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256);
-	__m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256);
-	__m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256);
-
-	__m256 v1_x_256 = _mm256_sub_ps(triA[0], Px256);
-	__m256 v1_y_256 = _mm256_sub_ps(triA[1], Py256);
-	__m256 v1_z_256 = _mm256_sub_ps(triA[2], Pz256);
-
-	__m256 v2_x_256 = _mm256_sub_ps(triB[0], Px256);
-	__m256 v2_y_256 = _mm256_sub_ps(triB[1], Py256);
-	__m256 v2_z_256 = _mm256_sub_ps(triB[2], Pz256);
-
-	__m256 v0_v1_x_256 = _mm256_add_ps(v0_x_256, v1_x_256);
-	__m256 v0_v1_y_256 = _mm256_add_ps(v0_y_256, v1_y_256);
-	__m256 v0_v1_z_256 = _mm256_add_ps(v0_z_256, v1_z_256);
-
-	__m256 v0_v2_x_256 = _mm256_add_ps(v0_x_256, v2_x_256);
-	__m256 v0_v2_y_256 = _mm256_add_ps(v0_y_256, v2_y_256);
-	__m256 v0_v2_z_256 = _mm256_add_ps(v0_z_256, v2_z_256);
-
-	__m256 v1_v2_x_256 = _mm256_add_ps(v1_x_256, v2_x_256);
-	__m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256);
-	__m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256);
-
-	/* Calculate triangle edges. */
-	__m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256);
-	__m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256);
-	__m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256);
-
-	__m256 e1_x_256 = _mm256_sub_ps(v0_x_256, v1_x_256);
-	__m256 e1_y_256 = _mm256_sub_ps(v0_y_256, v1_y_256);
-	__m256 e1_z_256 = _mm256_sub_ps(v0_z_256, v1_z_256);
-
-	__m256 e2_x_256 = _mm256_sub_ps(v1_x_256, v2_x_256);
-	__m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256);
-	__m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256);
-
-	/* Perform edge tests. */
-	/* cross (AyBz - AzBy, AzBx -AxBz,  AxBy - AyBx) */
-	__m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256);
-	__m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256);
-	__m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256);
-	/* vertical dot */
-	__m256 U_256 = _mm256_mul_ps(U_x_256, dirx256);
-	U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256);
-	U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256);
-
-	__m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256);
-	__m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256);
-	__m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256);
-	/* vertical dot */
-	__m256 V_256 = _mm256_mul_ps(V_x_256, dirx256);
-	V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256);
-	V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256);
-
-	__m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256);
-	__m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256);
-	__m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256);
-	/* vertical dot */
-	__m256 W_256 = _mm256_mul_ps(W_x_256, dirx256);
-	W_256 = _mm256_fmadd_ps(W_y_256, diry256,W_256);
-	W_256 = _mm256_fmadd_ps(W_z_256, dirz256,W_256);
-
-	__m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31);
-	__m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31);
-	__m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31);
-	__m256i UVW_256_1 = _mm256_add_epi32(_mm256_add_epi32(U_256_1, V_256_1), W_256_1);
-
-	const __m256i one256 = _mm256_set1_epi32(1);
-	const __m256i two256 = _mm256_set1_epi32(2);
-
-	__m256i mask_minmaxUVW_256 = _mm256_or_si256(
-	        _mm256_cmpeq_epi32(one256, UVW_256_1),
-	        _mm256_cmpeq_epi32(two256, UVW_256_1));
-
-	unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256));
-	if((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set
-		return false;
-	}
-
-	/* Calculate geometry normal and denominator. */
-	__m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256);
-	__m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256);
-	__m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256);
-
-	Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256);
-	Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256);
-	Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256);
-
-	/* vertical dot */
-	__m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256);
-	den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256,den_256);
-	den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256,den_256);
-
-	/* Perform depth test. */
-	__m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256);
-	T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256,T_256);
-	T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256,T_256);
-
-	const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000);
-	__m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000);
-
-	__m256 sign_T_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256));
-
-	unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256);
-	if(((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) {
-		return false;
-	}
-
-	__m256 xor_signmask_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256));
-
-	ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8];
-	ccl_align(32) unsigned int mask_minmaxUVW8[8];
-
-	if(visibility == PATH_RAY_SHADOW_OPAQUE) {
-		__m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256);
-		__m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
-		__m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256));
-		__m256 rayt_256 = _mm256_set1_ps((*isect)->t);
-		__m256i mask1 = _mm256_cmpgt_epi32(_mm256_castps_si256(sign_T_256),
-			_mm256_castps_si256(
-				_mm256_mul_ps(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), rayt_256)
-			)
-		);
-		mask0 = _mm256_or_si256(mask1, mask0);
-		mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask)
-		mask_final_256 = _mm256_andnot_si256(maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden)
-		unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256));
-		if((mask_final & prim_num_mask) == 0) {
-			return false;
-		}
-		const int i = __bsf(mask_final);
-		__m256 inv_den_256 = _mm256_rcp_ps(den_256);
-		U_256 = _mm256_mul_ps(U_256, inv_den_256);
-		V_256 = _mm256_mul_ps(V_256, inv_den_256);
-		T_256 = _mm256_mul_ps(T_256, inv_den_256);
-		_mm256_store_ps(U8, U_256);
-		_mm256_store_ps(V8, V_256);
-		_mm256_store_ps(T8, T_256);
-		/* NOTE: Here we assume visibility for all triangles in the node is
-		 * the same. */
-		(*isect)->u = U8[i];
-		(*isect)->v = V8[i];
-		(*isect)->t = T8[i];
-		(*isect)->prim = (prim_addr + i);
-		(*isect)->object = object;
-		(*isect)->type = PRIMITIVE_TRIANGLE;
-		return true;
-	}
-	else {
-		_mm256_store_ps(den8, den_256);
-		_mm256_store_ps(U8, U_256);
-		_mm256_store_ps(V8, V_256);
-		_mm256_store_ps(T8, T_256);
-
-		_mm256_store_ps(sign_T8, sign_T_256);
-		_mm256_store_ps(xor_signmask8, xor_signmask_256);
-		_mm256_store_si256((__m256i*)mask_minmaxUVW8, mask_minmaxUVW_256);
-
-		int ret = false;
-
-		if(visibility == PATH_RAY_SHADOW) {
-			for(int i = 0; i < prim_num; i++) {
-				if(mask_minmaxUVW8[i]) {
-					continue;
-				}
-#ifdef __VISIBILITY_FLAG__
-				if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
-					continue;
-				}
-#endif
-				if((sign_T8[i] < 0.0f) ||
-				   (sign_T8[i] > (*isect)->t * xor_signmask8[i]))
-				{
-					continue;
-				}
-				if(!den8[i]) {
-					continue;
-				}
-				const float inv_den = 1.0f / den8[i];
-				(*isect)->u = U8[i] * inv_den;
-				(*isect)->v = V8[i] * inv_den;
-				(*isect)->t = T8[i] * inv_den;
-				(*isect)->prim = (prim_addr + i);
-				(*isect)->object = object;
-				(*isect)->type = PRIMITIVE_TRIANGLE;
-				const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim);
-				int shader = 0;
-#ifdef __HAIR__
-				if(kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
-				{
-					shader = kernel_tex_fetch(__tri_shader, prim);
-				}
-#ifdef __HAIR__
-				else {
-					float4 str = kernel_tex_fetch(__curves, prim);
-					shader = __float_as_int(str.z);
-				}
-#endif
-				const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-				/* If no transparent shadows, all light is blocked. */
-				if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
-					return 2;
-				}
-				/* If maximum number of hits reached, block all light. */
-				else if(num_hits == NULL || *num_hits == max_hits) {
-					return 2;
-				}
-				/* Move on to next entry in intersections array. */
-				ret = true;
-				(*isect)++;
-				(*num_hits)++;
-				(*num_hits_in_instance)++;
-				(*isect)->t = isect_t;
-			}
-		}
-		else {
-			for(int i = 0; i < prim_num; i++) {
-				if(mask_minmaxUVW8[i]) {
-					continue;
-				}
-#ifdef __VISIBILITY_FLAG__
-				if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
-					continue;
-				}
-#endif
-				if((sign_T8[i] < 0.0f) ||
-				   (sign_T8[i] > (*isect)->t * xor_signmask8[i]))
-				{
-					continue;
-				}
-				if(!den8[i]) {
-					continue;
-				}
-				const float inv_den = 1.0f / den8[i];
-				(*isect)->u = U8[i] * inv_den;
-				(*isect)->v = V8[i] * inv_den;
-				(*isect)->t = T8[i] * inv_den;
-				(*isect)->prim = (prim_addr + i);
-				(*isect)->object = object;
-				(*isect)->type = PRIMITIVE_TRIANGLE;
-				ret = true;
-			}
-		}
-		return ret;
-	}
+  const unsigned char prim_num_mask = (1 << prim_num) - 1;
+
+  const __m256i zero256 = _mm256_setzero_si256();
+
+  const __m256 Px256 = _mm256_set1_ps(ray_P.x);
+  const __m256 Py256 = _mm256_set1_ps(ray_P.y);
+  const __m256 Pz256 = _mm256_set1_ps(ray_P.z);
+
+  const __m256 dirx256 = _mm256_set1_ps(ray_dir.x);
+  const __m256 diry256 = _mm256_set1_ps(ray_dir.y);
+  const __m256 dirz256 = _mm256_set1_ps(ray_dir.z);
+
+  /* Calculate vertices relative to ray origin. */
+  __m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256);
+  __m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256);
+  __m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256);
+
+  __m256 v1_x_256 = _mm256_sub_ps(triA[0], Px256);
+  __m256 v1_y_256 = _mm256_sub_ps(triA[1], Py256);
+  __m256 v1_z_256 = _mm256_sub_ps(triA[2], Pz256);
+
+  __m256 v2_x_256 = _mm256_sub_ps(triB[0], Px256);
+  __m256 v2_y_256 = _mm256_sub_ps(triB[1], Py256);
+  __m256 v2_z_256 = _mm256_sub_ps(triB[2], Pz256);
+
+  __m256 v0_v1_x_256 = _mm256_add_ps(v0_x_256, v1_x_256);
+  __m256 v0_v1_y_256 = _mm256_add_ps(v0_y_256, v1_y_256);
+  __m256 v0_v1_z_256 = _mm256_add_ps(v0_z_256, v1_z_256);
+
+  __m256 v0_v2_x_256 = _mm256_add_ps(v0_x_256, v2_x_256);
+  __m256 v0_v2_y_256 = _mm256_add_ps(v0_y_256, v2_y_256);
+  __m256 v0_v2_z_256 = _mm256_add_ps(v0_z_256, v2_z_256);
+
+  __m256 v1_v2_x_256 = _mm256_add_ps(v1_x_256, v2_x_256);
+  __m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256);
+  __m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256);
+
+  /* Calculate triangle edges. */
+  __m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256);
+  __m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256);
+  __m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256);
+
+  __m256 e1_x_256 = _mm256_sub_ps(v0_x_256, v1_x_256);
+  __m256 e1_y_256 = _mm256_sub_ps(v0_y_256, v1_y_256);
+  __m256 e1_z_256 = _mm256_sub_ps(v0_z_256, v1_z_256);
+
+  __m256 e2_x_256 = _mm256_sub_ps(v1_x_256, v2_x_256);
+  __m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256);
+  __m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256);
+
+  /* Perform edge tests. */
+  /* cross (AyBz - AzBy, AzBx -AxBz,  AxBy - AyBx) */
+  __m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256);
+  __m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256);
+  __m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256);
+  /* vertical dot */
+  __m256 U_256 = _mm256_mul_ps(U_x_256, dirx256);
+  U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256);
+  U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256);
+
+  __m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256);
+  __m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256);
+  __m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256);
+  /* vertical dot */
+  __m256 V_256 = _mm256_mul_ps(V_x_256, dirx256);
+  V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256);
+  V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256);
+
+  __m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256);
+  __m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256);
+  __m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256);
+  /* vertical dot */
+  __m256 W_256 = _mm256_mul_ps(W_x_256, dirx256);
+  W_256 = _mm256_fmadd_ps(W_y_256, diry256, W_256);
+  W_256 = _mm256_fmadd_ps(W_z_256, dirz256, W_256);
+
+  __m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31);
+  __m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31);
+  __m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31);
+  __m256i UVW_256_1 = _mm256_add_epi32(_mm256_add_epi32(U_256_1, V_256_1), W_256_1);
+
+  const __m256i one256 = _mm256_set1_epi32(1);
+  const __m256i two256 = _mm256_set1_epi32(2);
+
+  __m256i mask_minmaxUVW_256 = _mm256_or_si256(_mm256_cmpeq_epi32(one256, UVW_256_1),
+                                               _mm256_cmpeq_epi32(two256, UVW_256_1));
+
+  unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256));
+  if ((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) {  //all bits set
+    return false;
+  }
+
+  /* Calculate geometry normal and denominator. */
+  __m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256);
+  __m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256);
+  __m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256);
+
+  Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256);
+  Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256);
+  Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256);
+
+  /* vertical dot */
+  __m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256);
+  den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256, den_256);
+  den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256, den_256);
+
+  /* Perform depth test. */
+  __m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256);
+  T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256, T_256);
+  T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256, T_256);
+
+  const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000);
+  __m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000);
+
+  __m256 sign_T_256 = _mm256_castsi256_ps(
+      _mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256));
+
+  unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256);
+  if (((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) {
+    return false;
+  }
+
+  __m256 xor_signmask_256 = _mm256_castsi256_ps(
+      _mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256));
+
+  ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8];
+  ccl_align(32) unsigned int mask_minmaxUVW8[8];
+
+  if (visibility == PATH_RAY_SHADOW_OPAQUE) {
+    __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256);
+    __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
+    __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256));
+    __m256 rayt_256 = _mm256_set1_ps((*isect)->t);
+    __m256i mask1 = _mm256_cmpgt_epi32(
+        _mm256_castps_si256(sign_T_256),
+        _mm256_castps_si256(_mm256_mul_ps(
+            _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)),
+            rayt_256)));
+    mask0 = _mm256_or_si256(mask1, mask0);
+    mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256);  //(~mask_minmaxUVW_pos) &(~mask)
+    mask_final_256 = _mm256_andnot_si256(
+        maskden256, mask_final_256);  //(~mask_minmaxUVW_pos) &(~mask) & (~maskden)
+    unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256));
+    if ((mask_final & prim_num_mask) == 0) {
+      return false;
+    }
+    const int i = __bsf(mask_final);
+    __m256 inv_den_256 = _mm256_rcp_ps(den_256);
+    U_256 = _mm256_mul_ps(U_256, inv_den_256);
+    V_256 = _mm256_mul_ps(V_256, inv_den_256);
+    T_256 = _mm256_mul_ps(T_256, inv_den_256);
+    _mm256_store_ps(U8, U_256);
+    _mm256_store_ps(V8, V_256);
+    _mm256_store_ps(T8, T_256);
+    /* NOTE: Here we assume visibility for all triangles in the node is
+     * the same. */
+    (*isect)->u = U8[i];
+    (*isect)->v = V8[i];
+    (*isect)->t = T8[i];
+    (*isect)->prim = (prim_addr + i);
+    (*isect)->object = object;
+    (*isect)->type = PRIMITIVE_TRIANGLE;
+    return true;
+  }
+  else {
+    _mm256_store_ps(den8, den_256);
+    _mm256_store_ps(U8, U_256);
+    _mm256_store_ps(V8, V_256);
+    _mm256_store_ps(T8, T_256);
+
+    _mm256_store_ps(sign_T8, sign_T_256);
+    _mm256_store_ps(xor_signmask8, xor_signmask_256);
+    _mm256_store_si256((__m256i *)mask_minmaxUVW8, mask_minmaxUVW_256);
+
+    int ret = false;
+
+    if (visibility == PATH_RAY_SHADOW) {
+      for (int i = 0; i < prim_num; i++) {
+        if (mask_minmaxUVW8[i]) {
+          continue;
+        }
+#  ifdef __VISIBILITY_FLAG__
+        if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
+          continue;
+        }
+#  endif
+        if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) {
+          continue;
+        }
+        if (!den8[i]) {
+          continue;
+        }
+        const float inv_den = 1.0f / den8[i];
+        (*isect)->u = U8[i] * inv_den;
+        (*isect)->v = V8[i] * inv_den;
+        (*isect)->t = T8[i] * inv_den;
+        (*isect)->prim = (prim_addr + i);
+        (*isect)->object = object;
+        (*isect)->type = PRIMITIVE_TRIANGLE;
+        const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim);
+        int shader = 0;
+#  ifdef __HAIR__
+        if (kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE)
+#  endif
+        {
+          shader = kernel_tex_fetch(__tri_shader, prim);
+        }
+#  ifdef __HAIR__
+        else {
+          float4 str = kernel_tex_fetch(__curves, prim);
+          shader = __float_as_int(str.z);
+        }
+#  endif
+        const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+        /* If no transparent shadows, all light is blocked. */
+        if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+          return 2;
+        }
+        /* If maximum number of hits reached, block all light. */
+        else if (num_hits == NULL || *num_hits == max_hits) {
+          return 2;
+        }
+        /* Move on to next entry in intersections array. */
+        ret = true;
+        (*isect)++;
+        (*num_hits)++;
+        (*num_hits_in_instance)++;
+        (*isect)->t = isect_t;
+      }
+    }
+    else {
+      for (int i = 0; i < prim_num; i++) {
+        if (mask_minmaxUVW8[i]) {
+          continue;
+        }
+#  ifdef __VISIBILITY_FLAG__
+        if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
+          continue;
+        }
+#  endif
+        if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) {
+          continue;
+        }
+        if (!den8[i]) {
+          continue;
+        }
+        const float inv_den = 1.0f / den8[i];
+        (*isect)->u = U8[i] * inv_den;
+        (*isect)->v = V8[i] * inv_den;
+        (*isect)->t = T8[i] * inv_den;
+        (*isect)->prim = (prim_addr + i);
+        (*isect)->object = object;
+        (*isect)->type = PRIMITIVE_TRIANGLE;
+        ret = true;
+      }
+    }
+    return ret;
+  }
 }
 
-ccl_device_inline int triangle_intersect8(
-        KernelGlobals *kg,
-        Intersection **isect,
-        float3 P,
-        float3 dir,
-        uint visibility,
-        int object,
-        int prim_addr,
-        int prim_num,
-        uint *num_hits,
-        uint max_hits,
-        int *num_hits_in_instance,
-        float isect_t)
- {
-	__m128 tri_a[8], tri_b[8], tri_c[8];
-	__m256  tritmp[12], tri[12];
-	__m256  triA[3], triB[3], triC[3];
-
-	int i, r;
-
-	uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
-	for(i = 0; i < prim_num; i++) {
-		tri_a[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++];
-		tri_b[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++];
-		tri_c[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++];
-	 }
-	//create 9 or  12 placeholders
-	tri[0] = _mm256_castps128_ps256(tri_a[0]);    //_mm256_zextps128_ps256
-	tri[1] = _mm256_castps128_ps256(tri_b[0]);//_mm256_zextps128_ps256
-	tri[2] = _mm256_castps128_ps256(tri_c[0]);//_mm256_zextps128_ps256
-
-	tri[3] = _mm256_castps128_ps256(tri_a[1]);    //_mm256_zextps128_ps256
-	tri[4] = _mm256_castps128_ps256(tri_b[1]);//_mm256_zextps128_ps256
-	tri[5] = _mm256_castps128_ps256(tri_c[1]);//_mm256_zextps128_ps256
-
-	tri[6] = _mm256_castps128_ps256(tri_a[2]);    //_mm256_zextps128_ps256
-	tri[7] = _mm256_castps128_ps256(tri_b[2]);//_mm256_zextps128_ps256
-	tri[8] = _mm256_castps128_ps256(tri_c[2]);//_mm256_zextps128_ps256
-
-	if(prim_num > 3) {
-		tri[9] =  _mm256_castps128_ps256(tri_a[3]);    //_mm256_zextps128_ps256
-		tri[10] = _mm256_castps128_ps256(tri_b[3]);//_mm256_zextps128_ps256
-		tri[11] = _mm256_castps128_ps256(tri_c[3]);//_mm256_zextps128_ps256
-	}
-
-	for(i = 4, r = 0; i < prim_num; i ++, r += 3) {
-		tri[r] =     _mm256_insertf128_ps(tri[r] , tri_a[i], 1);
-		tri[r + 1] = _mm256_insertf128_ps(tri[r + 1], tri_b[i], 1);
-		tri[r + 2] = _mm256_insertf128_ps(tri[r + 2], tri_c[i], 1);
-	 }
-
-	//------------------------------------------------
-	//0!  Xa0 Ya0 Za0 1 Xa4 Ya4 Za4  1
-	//1!  Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1
-	//2!  Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1
-
-	//3!  Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1
-	//4!  Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5  1
-	//5!  Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1
-
-	//6!  Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1
-	//7!  Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6  1
-	//8!  Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1
-
-	//9!  Xa3 Ya3 Za3 1 Xa7 Ya7 Za7  1
-	//10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7  1
-	//11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7  1
-
-	//"transpose"
-	tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]);   //0!  Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5
-	tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]);   //1!  Za0 Za1 1   1   Za4 Za5  1   1
-
-	tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]);   //2!  Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7
-	tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]);   //3!  Za2 Za3  1   1  Za6 Za7  1   1
-
-	tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]);   //4!  Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5
-	tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]);   //5!  Zb0 Zb1  1  1   Zb4 Zb5  1   1
-
-	tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]);  //6!  Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7
-	tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]);  //7!  Zb2 Zb3  1    1 Zb6 Zb7  1   1
-
-	tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]);   //8!  Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5
-	tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]);   //9!  Zc0 Zc1  1   1  Zc4 Zc5  1   1
-
-	tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); //10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7
-	tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); //11! Zc2 Zc3  1   1  Zc6 Zc7  1   1
-
-				/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
-	triA[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[0]), _mm256_castps_pd(tritmp[2]))); 	//  Xa0 Xa1 Xa2 Xa3 Xa4 Xa5 Xa6 Xa7
-	triA[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[0]), _mm256_castps_pd(tritmp[2])));   //  Ya0 Ya1 Ya2 Ya3 Ya4 Ya5 Ya6 Ya7
-	triA[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[1]), _mm256_castps_pd(tritmp[3])));   //  Za0 Za1 Za2 Za3 Za4 Za5 Za6 Za7
-
-	triB[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[4]), _mm256_castps_pd(tritmp[6]))); 	//  Xb0 Xb1  Xb2 Xb3 Xb4 Xb5 Xb5 Xb7
-	triB[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[4]), _mm256_castps_pd(tritmp[6])));   //  Yb0 Yb1  Yb2 Yb3 Yb4 Yb5 Yb5 Yb7
-	triB[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[5]), _mm256_castps_pd(tritmp[7]))); //    Zb0 Zb1  Zb2 Zb3 Zb4 Zb5 Zb5 Zb7
-
-	triC[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]), _mm256_castps_pd(tritmp[10])));     //Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7
-	triC[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]), _mm256_castps_pd(tritmp[10])));     //Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7
-	triC[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]), _mm256_castps_pd(tritmp[11])));     //Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7
-
-			  /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
-
-	int result = ray_triangle_intersect8(kg, P,
-	                                     dir,
-	                                     isect,
-	                                     visibility, object,
-	                                     triA,
-	                                     triB,
-	                                     triC,
-	                                     prim_addr,
-	                                     prim_num,
-	                                     num_hits,
-	                                     max_hits,
-	                                     num_hits_in_instance,
-	                                     isect_t);
-	return result;
+ccl_device_inline int triangle_intersect8(KernelGlobals *kg,
+                                          Intersection **isect,
+                                          float3 P,
+                                          float3 dir,
+                                          uint visibility,
+                                          int object,
+                                          int prim_addr,
+                                          int prim_num,
+                                          uint *num_hits,
+                                          uint max_hits,
+                                          int *num_hits_in_instance,
+                                          float isect_t)
+{
+  __m128 tri_a[8], tri_b[8], tri_c[8];
+  __m256 tritmp[12], tri[12];
+  __m256 triA[3], triB[3], triC[3];
+
+  int i, r;
+
+  uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
+  for (i = 0; i < prim_num; i++) {
+    tri_a[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
+    tri_b[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
+    tri_c[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
+  }
+  //create 9 or  12 placeholders
+  tri[0] = _mm256_castps128_ps256(tri_a[0]);  //_mm256_zextps128_ps256
+  tri[1] = _mm256_castps128_ps256(tri_b[0]);  //_mm256_zextps128_ps256
+  tri[2] = _mm256_castps128_ps256(tri_c[0]);  //_mm256_zextps128_ps256
+
+  tri[3] = _mm256_castps128_ps256(tri_a[1]);  //_mm256_zextps128_ps256
+  tri[4] = _mm256_castps128_ps256(tri_b[1]);  //_mm256_zextps128_ps256
+  tri[5] = _mm256_castps128_ps256(tri_c[1]);  //_mm256_zextps128_ps256
+
+  tri[6] = _mm256_castps128_ps256(tri_a[2]);  //_mm256_zextps128_ps256
+  tri[7] = _mm256_castps128_ps256(tri_b[2]);  //_mm256_zextps128_ps256
+  tri[8] = _mm256_castps128_ps256(tri_c[2]);  //_mm256_zextps128_ps256
+
+  if (prim_num > 3) {
+    tri[9] = _mm256_castps128_ps256(tri_a[3]);   //_mm256_zextps128_ps256
+    tri[10] = _mm256_castps128_ps256(tri_b[3]);  //_mm256_zextps128_ps256
+    tri[11] = _mm256_castps128_ps256(tri_c[3]);  //_mm256_zextps128_ps256
+  }
+
+  for (i = 4, r = 0; i < prim_num; i++, r += 3) {
+    tri[r] = _mm256_insertf128_ps(tri[r], tri_a[i], 1);
+    tri[r + 1] = _mm256_insertf128_ps(tri[r + 1], tri_b[i], 1);
+    tri[r + 2] = _mm256_insertf128_ps(tri[r + 2], tri_c[i], 1);
+  }
+
+  //------------------------------------------------
+  //0!  Xa0 Ya0 Za0 1 Xa4 Ya4 Za4  1
+  //1!  Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1
+  //2!  Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1
+
+  //3!  Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1
+  //4!  Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5  1
+  //5!  Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1
+
+  //6!  Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1
+  //7!  Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6  1
+  //8!  Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1
+
+  //9!  Xa3 Ya3 Za3 1 Xa7 Ya7 Za7  1
+  //10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7  1
+  //11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7  1
+
+  //"transpose"
+  tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]);  //0!  Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5
+  tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]);  //1!  Za0 Za1 1   1   Za4 Za5  1   1
+
+  tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]);  //2!  Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7
+  tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]);  //3!  Za2 Za3  1   1  Za6 Za7  1   1
+
+  tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]);  //4!  Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5
+  tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]);  //5!  Zb0 Zb1  1  1   Zb4 Zb5  1   1
+
+  tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]);  //6!  Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7
+  tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]);  //7!  Zb2 Zb3  1    1 Zb6 Zb7  1   1
+
+  tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]);  //8!  Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5
+  tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]);  //9!  Zc0 Zc1  1   1  Zc4 Zc5  1   1
+
+  tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]);  //10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7
+  tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]);  //11! Zc2 Zc3  1   1  Zc6 Zc7  1   1
+
+  /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+  triA[0] = _mm256_castpd_ps(
+      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[0]),
+                         _mm256_castps_pd(tritmp[2])));  //  Xa0 Xa1 Xa2 Xa3 Xa4 Xa5 Xa6 Xa7
+  triA[1] = _mm256_castpd_ps(
+      _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[0]),
+                         _mm256_castps_pd(tritmp[2])));  //  Ya0 Ya1 Ya2 Ya3 Ya4 Ya5 Ya6 Ya7
+  triA[2] = _mm256_castpd_ps(
+      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[1]),
+                         _mm256_castps_pd(tritmp[3])));  //  Za0 Za1 Za2 Za3 Za4 Za5 Za6 Za7
+
+  triB[0] = _mm256_castpd_ps(
+      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[4]),
+                         _mm256_castps_pd(tritmp[6])));  //  Xb0 Xb1  Xb2 Xb3 Xb4 Xb5 Xb5 Xb7
+  triB[1] = _mm256_castpd_ps(
+      _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[4]),
+                         _mm256_castps_pd(tritmp[6])));  //  Yb0 Yb1  Yb2 Yb3 Yb4 Yb5 Yb5 Yb7
+  triB[2] = _mm256_castpd_ps(
+      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[5]),
+                         _mm256_castps_pd(tritmp[7])));  //    Zb0 Zb1  Zb2 Zb3 Zb4 Zb5 Zb5 Zb7
+
+  triC[0] = _mm256_castpd_ps(
+      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]),
+                         _mm256_castps_pd(tritmp[10])));  //Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7
+  triC[1] = _mm256_castpd_ps(
+      _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]),
+                         _mm256_castps_pd(tritmp[10])));  //Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7
+  triC[2] = _mm256_castpd_ps(
+      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]),
+                         _mm256_castps_pd(tritmp[11])));  //Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7
+
+  /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+
+  int result = ray_triangle_intersect8(kg,
+                                       P,
+                                       dir,
+                                       isect,
+                                       visibility,
+                                       object,
+                                       triA,
+                                       triB,
+                                       triC,
+                                       prim_addr,
+                                       prim_num,
+                                       num_hits,
+                                       max_hits,
+                                       num_hits_in_instance,
+                                       isect_t);
+  return result;
 }
 
-#endif  /* __KERNEL_AVX2__ */
+#endif /* __KERNEL_AVX2__ */
 
 /* Special ray intersection routines for subsurface scattering. In that case we
  * only want to intersect with primitives in the same object, and if case of
@@ -479,106 +496,108 @@ ccl_device_inline int triangle_intersect8(
  */
 
 #ifdef __BVH_LOCAL__
-ccl_device_inline bool triangle_intersect_local(
-        KernelGlobals *kg,
-        LocalIntersection *local_isect,
-        float3 P,
-        float3 dir,
-        int object,
-        int local_object,
-        int prim_addr,
-        float tmax,
-        uint *lcg_state,
-        int max_hits)
+ccl_device_inline bool triangle_intersect_local(KernelGlobals *kg,
+                                                LocalIntersection *local_isect,
+                                                float3 P,
+                                                float3 dir,
+                                                int object,
+                                                int local_object,
+                                                int prim_addr,
+                                                float tmax,
+                                                uint *lcg_state,
+                                                int max_hits)
 {
-	/* Only intersect with matching object, for instanced objects we
-	 * already know we are only intersecting the right object. */
-	if(object == OBJECT_NONE) {
-		if(kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
-			return false;
-		}
-	}
-
-	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-	const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
-#else
-	const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
-	             tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)),
-	             tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
-#endif
-	float t, u, v;
-	if(!ray_triangle_intersect(P,
-	                           dir,
-	                           tmax,
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-	                           ssef_verts,
-#else
-	                           tri_a, tri_b, tri_c,
-#endif
-	                           &u, &v, &t))
-	{
-		return false;
-	}
-
-	/* If no actual hit information is requested, just return here. */
-	if(max_hits == 0) {
-		return true;
-	}
-
-	int hit;
-	if(lcg_state) {
-		/* Record up to max_hits intersections. */
-		for(int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
-			if(local_isect->hits[i].t == t) {
-				return false;
-			}
-		}
-
-		local_isect->num_hits++;
-
-		if(local_isect->num_hits <= max_hits) {
-			hit = local_isect->num_hits - 1;
-		}
-		else {
-			/* reservoir sampling: if we are at the maximum number of
-			 * hits, randomly replace element or skip it */
-			hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
-
-			if(hit >= max_hits)
-				return false;
-		}
-	}
-	else {
-		/* Record closest intersection only. */
-		if(local_isect->num_hits && t > local_isect->hits[0].t) {
-			return false;
-		}
-
-		hit = 0;
-		local_isect->num_hits = 1;
-	}
-
-	/* Record intersection. */
-	Intersection *isect = &local_isect->hits[hit];
-	isect->prim = prim_addr;
-	isect->object = object;
-	isect->type = PRIMITIVE_TRIANGLE;
-	isect->u = u;
-	isect->v = v;
-	isect->t = t;
-
-	/* Record geometric normal. */
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-	const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
-	             tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)),
-	             tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
-#endif
-	local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
+  /* Only intersect with matching object, for instanced objects we
+   * already know we are only intersecting the right object. */
+  if (object == OBJECT_NONE) {
+    if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
+      return false;
+    }
+  }
+
+  const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
+#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+  const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex];
+#  else
+  const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)),
+               tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)),
+               tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
+#  endif
+  float t, u, v;
+  if (!ray_triangle_intersect(P,
+                              dir,
+                              tmax,
+#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+                              ssef_verts,
+#  else
+                              tri_a,
+                              tri_b,
+                              tri_c,
+#  endif
+                              &u,
+                              &v,
+                              &t)) {
+    return false;
+  }
+
+  /* If no actual hit information is requested, just return here. */
+  if (max_hits == 0) {
+    return true;
+  }
+
+  int hit;
+  if (lcg_state) {
+    /* Record up to max_hits intersections. */
+    for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+      if (local_isect->hits[i].t == t) {
+        return false;
+      }
+    }
+
+    local_isect->num_hits++;
+
+    if (local_isect->num_hits <= max_hits) {
+      hit = local_isect->num_hits - 1;
+    }
+    else {
+      /* reservoir sampling: if we are at the maximum number of
+       * hits, randomly replace element or skip it */
+      hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
+
+      if (hit >= max_hits)
+        return false;
+    }
+  }
+  else {
+    /* Record closest intersection only. */
+    if (local_isect->num_hits && t > local_isect->hits[0].t) {
+      return false;
+    }
+
+    hit = 0;
+    local_isect->num_hits = 1;
+  }
+
+  /* Record intersection. */
+  Intersection *isect = &local_isect->hits[hit];
+  isect->prim = prim_addr;
+  isect->object = object;
+  isect->type = PRIMITIVE_TRIANGLE;
+  isect->u = u;
+  isect->v = v;
+  isect->t = t;
+
+  /* Record geometric normal. */
+#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+  const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)),
+               tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)),
+               tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
+#  endif
+  local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
 
-	return false;
+  return false;
 }
-#endif  /* __BVH_LOCAL__ */
+#endif /* __BVH_LOCAL__ */
 
 /* Refine triangle intersection to more precise hit point. For rays that travel
  * far the precision is often not so good, this reintersects the primitive from
@@ -596,61 +615,61 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
                                          const Intersection *isect,
                                          const Ray *ray)
 {
-	float3 P = ray->P;
-	float3 D = ray->D;
-	float t = isect->t;
+  float3 P = ray->P;
+  float3 D = ray->D;
+  float t = isect->t;
 
 #ifdef __INTERSECTION_REFINE__
-	if(isect->object != OBJECT_NONE) {
-		if(UNLIKELY(t == 0.0f)) {
-			return P;
-		}
+  if (isect->object != OBJECT_NONE) {
+    if (UNLIKELY(t == 0.0f)) {
+      return P;
+    }
 #  ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_itfm;
+    Transform tfm = sd->ob_itfm;
 #  else
-		Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+    Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
 #  endif
 
-		P = transform_point(&tfm, P);
-		D = transform_direction(&tfm, D*t);
-		D = normalize_len(D, &t);
-	}
-
-	P = P + D*t;
-
-	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
-	const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
-	             tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
-	             tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
-	float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
-	float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
-	float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
-	float3 qvec = cross(tvec, edge1);
-	float3 pvec = cross(D, edge2);
-	float det = dot(edge1, pvec);
-	if(det != 0.0f) {
-		/* If determinant is zero it means ray lies in the plane of
-		 * the triangle. It is possible in theory due to watertight
-		 * nature of triangle intersection. For such cases we simply
-		 * don't refine intersection hoping it'll go all fine.
-		 */
-		float rt = dot(edge2, qvec) / det;
-		P = P + D*rt;
-	}
-
-	if(isect->object != OBJECT_NONE) {
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D * t);
+    D = normalize_len(D, &t);
+  }
+
+  P = P + D * t;
+
+  const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
+  const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
+               tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
+               tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2);
+  float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+  float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+  float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
+  float3 qvec = cross(tvec, edge1);
+  float3 pvec = cross(D, edge2);
+  float det = dot(edge1, pvec);
+  if (det != 0.0f) {
+    /* If determinant is zero it means ray lies in the plane of
+     * the triangle. It is possible in theory due to watertight
+     * nature of triangle intersection. For such cases we simply
+     * don't refine intersection hoping it'll go all fine.
+     */
+    float rt = dot(edge2, qvec) / det;
+    P = P + D * rt;
+  }
+
+  if (isect->object != OBJECT_NONE) {
 #  ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_tfm;
+    Transform tfm = sd->ob_tfm;
 #  else
-		Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+    Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
 #  endif
 
-		P = transform_point(&tfm, P);
-	}
+    P = transform_point(&tfm, P);
+  }
 
-	return P;
+  return P;
 #else
-	return P + D*t;
+  return P + D * t;
 #endif
 }
 
@@ -662,61 +681,57 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg,
                                                const Intersection *isect,
                                                const Ray *ray)
 {
-	float3 P = ray->P;
-	float3 D = ray->D;
-	float t = isect->t;
+  float3 P = ray->P;
+  float3 D = ray->D;
+  float t = isect->t;
 
-	if(isect->object != OBJECT_NONE) {
+  if (isect->object != OBJECT_NONE) {
 #ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_itfm;
+    Transform tfm = sd->ob_itfm;
 #else
-		Transform tfm = object_fetch_transform(kg,
-		                                       isect->object,
-		                                       OBJECT_INVERSE_TRANSFORM);
+    Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
 #endif
 
-		P = transform_point(&tfm, P);
-		D = transform_direction(&tfm, D);
-		D = normalize(D);
-	}
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D);
+    D = normalize(D);
+  }
 
-	P = P + D*t;
+  P = P + D * t;
 
 #ifdef __INTERSECTION_REFINE__
-	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
-	const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
-	             tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
-	             tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
-	float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
-	float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
-	float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
-	float3 qvec = cross(tvec, edge1);
-	float3 pvec = cross(D, edge2);
-	float det = dot(edge1, pvec);
-	if(det != 0.0f) {
-		/* If determinant is zero it means ray lies in the plane of
-		 * the triangle. It is possible in theory due to watertight
-		 * nature of triangle intersection. For such cases we simply
-		 * don't refine intersection hoping it'll go all fine.
-		 */
-		float rt = dot(edge2, qvec) / det;
-		P = P + D*rt;
-	}
-#endif  /* __INTERSECTION_REFINE__ */
-
-	if(isect->object != OBJECT_NONE) {
+  const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
+  const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
+               tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
+               tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2);
+  float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+  float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+  float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
+  float3 qvec = cross(tvec, edge1);
+  float3 pvec = cross(D, edge2);
+  float det = dot(edge1, pvec);
+  if (det != 0.0f) {
+    /* If determinant is zero it means ray lies in the plane of
+     * the triangle. It is possible in theory due to watertight
+     * nature of triangle intersection. For such cases we simply
+     * don't refine intersection hoping it'll go all fine.
+     */
+    float rt = dot(edge2, qvec) / det;
+    P = P + D * rt;
+  }
+#endif /* __INTERSECTION_REFINE__ */
+
+  if (isect->object != OBJECT_NONE) {
 #ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_tfm;
+    Transform tfm = sd->ob_tfm;
 #else
-		Transform tfm = object_fetch_transform(kg,
-		                                       isect->object,
-		                                       OBJECT_TRANSFORM);
+    Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
 #endif
 
-		P = transform_point(&tfm, P);
-	}
+    P = transform_point(&tfm, P);
+  }
 
-	return P;
+  return P;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 1977d263ece..96cf35a40dc 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -33,41 +33,47 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
                                                     const ShaderData *sd,
                                                     float3 P)
 {
-	/* todo: optimize this so it's just a single matrix multiplication when
-	 * possible (not motion blur), or perhaps even just translation + scale */
-	const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM);
+  /* todo: optimize this so it's just a single matrix multiplication when
+   * possible (not motion blur), or perhaps even just translation + scale */
+  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM);
 
-	object_inverse_position_transform(kg, sd, &P);
+  object_inverse_position_transform(kg, sd, &P);
 
-	if(desc.offset != ATTR_STD_NOT_FOUND) {
-		Transform tfm = primitive_attribute_matrix(kg, sd, desc);
-		P = transform_point(&tfm, P);
-	}
+  if (desc.offset != ATTR_STD_NOT_FOUND) {
+    Transform tfm = primitive_attribute_matrix(kg, sd, desc);
+    P = transform_point(&tfm, P);
+  }
 
-	return P;
+  return P;
 }
 
-ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc)
+ccl_device float volume_attribute_float(KernelGlobals *kg,
+                                        const ShaderData *sd,
+                                        const AttributeDescriptor desc)
 {
-	float3 P = volume_normalized_position(kg, sd, sd->P);
-	InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
-	float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
-	return average(float4_to_float3(r));
+  float3 P = volume_normalized_position(kg, sd, sd->P);
+  InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
+                                                            INTERPOLATION_NONE;
+  float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
+  return average(float4_to_float3(r));
 }
 
-ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc)
+ccl_device float3 volume_attribute_float3(KernelGlobals *kg,
+                                          const ShaderData *sd,
+                                          const AttributeDescriptor desc)
 {
-	float3 P = volume_normalized_position(kg, sd, sd->P);
-	InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
-	float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
+  float3 P = volume_normalized_position(kg, sd, sd->P);
+  InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
+                                                            INTERPOLATION_NONE;
+  float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
 
-	if(r.w > 1e-6f && r.w != 1.0f) {
-		/* For RGBA colors, unpremultiply after interpolation. */
-		return float4_to_float3(r) / r.w;
-	}
-	else {
-		return float4_to_float3(r);
-	}
+  if (r.w > 1e-6f && r.w != 1.0f) {
+    /* For RGBA colors, unpremultiply after interpolation. */
+    return float4_to_float3(r) / r.w;
+  }
+  else {
+    return float4_to_float3(r);
+  }
 }
 
 #endif
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index 1c8c91d15e6..dfdd8843f29 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -24,8 +24,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-#define KERNEL_NAME_JOIN(x, y, z) x ## _ ## y ## _ ## z
-#define KERNEL_NAME_EVAL(arch, name)  KERNEL_NAME_JOIN(kernel, arch, name)
+#define KERNEL_NAME_JOIN(x, y, z) x##_##y##_##z
+#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name)
 #define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name)
 
 struct KernelGlobals;
@@ -38,10 +38,7 @@ void *kernel_osl_memory(KernelGlobals *kg);
 bool kernel_osl_use(KernelGlobals *kg);
 
 void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size);
-void kernel_tex_copy(KernelGlobals *kg,
-                     const char *name,
-                     void *mem,
-                     size_t size);
+void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size);
 
 #define KERNEL_ARCH cpu
 #include "kernel/kernels/cpu/kernel_cpu.h"
@@ -63,4 +60,4 @@ void kernel_tex_copy(KernelGlobals *kg,
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_H__ */
+#endif /* __KERNEL_H__ */
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 86ad6e1a061..b9d723222a1 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -21,149 +21,150 @@ CCL_NAMESPACE_BEGIN
  * BSDF evaluation result, split per BSDF type. This is used to accumulate
  * render passes separately. */
 
-ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg,
-                                           const ShaderData *sd);
+ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd);
 
-ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 value, int use_light_pass)
+ccl_device_inline void bsdf_eval_init(BsdfEval *eval,
+                                      ClosureType type,
+                                      float3 value,
+                                      int use_light_pass)
 {
 #ifdef __PASSES__
-	eval->use_light_pass = use_light_pass;
-
-	if(eval->use_light_pass) {
-		eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
-		eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
-		eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
-		eval->transparent = make_float3(0.0f, 0.0f, 0.0f);
-		eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
-		eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
-
-		if(type == CLOSURE_BSDF_TRANSPARENT_ID)
-			eval->transparent = value;
-		else if(CLOSURE_IS_BSDF_DIFFUSE(type))
-			eval->diffuse = value;
-		else if(CLOSURE_IS_BSDF_GLOSSY(type))
-			eval->glossy = value;
-		else if(CLOSURE_IS_BSDF_TRANSMISSION(type))
-			eval->transmission = value;
-		else if(CLOSURE_IS_BSDF_BSSRDF(type))
-			eval->subsurface = value;
-		else if(CLOSURE_IS_PHASE(type))
-			eval->scatter = value;
-	}
-	else
-#endif
-	{
-		eval->diffuse = value;
-	}
+  eval->use_light_pass = use_light_pass;
+
+  if (eval->use_light_pass) {
+    eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
+    eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
+    eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
+    eval->transparent = make_float3(0.0f, 0.0f, 0.0f);
+    eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
+    eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
+
+    if (type == CLOSURE_BSDF_TRANSPARENT_ID)
+      eval->transparent = value;
+    else if (CLOSURE_IS_BSDF_DIFFUSE(type))
+      eval->diffuse = value;
+    else if (CLOSURE_IS_BSDF_GLOSSY(type))
+      eval->glossy = value;
+    else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
+      eval->transmission = value;
+    else if (CLOSURE_IS_BSDF_BSSRDF(type))
+      eval->subsurface = value;
+    else if (CLOSURE_IS_PHASE(type))
+      eval->scatter = value;
+  }
+  else
+#endif
+  {
+    eval->diffuse = value;
+  }
 #ifdef __SHADOW_TRICKS__
-	eval->sum_no_mis = make_float3(0.0f, 0.0f, 0.0f);
+  eval->sum_no_mis = make_float3(0.0f, 0.0f, 0.0f);
 #endif
 }
 
-ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value, float mis_weight)
+ccl_device_inline void bsdf_eval_accum(BsdfEval *eval,
+                                       ClosureType type,
+                                       float3 value,
+                                       float mis_weight)
 {
 #ifdef __SHADOW_TRICKS__
-	eval->sum_no_mis += value;
+  eval->sum_no_mis += value;
 #endif
-	value *= mis_weight;
+  value *= mis_weight;
 #ifdef __PASSES__
-	if(eval->use_light_pass) {
-		if(CLOSURE_IS_BSDF_DIFFUSE(type))
-			eval->diffuse += value;
-		else if(CLOSURE_IS_BSDF_GLOSSY(type))
-			eval->glossy += value;
-		else if(CLOSURE_IS_BSDF_TRANSMISSION(type))
-			eval->transmission += value;
-		else if(CLOSURE_IS_BSDF_BSSRDF(type))
-			eval->subsurface += value;
-		else if(CLOSURE_IS_PHASE(type))
-			eval->scatter += value;
-
-		/* skipping transparent, this function is used by for eval(), will be zero then */
-	}
-	else
-#endif
-	{
-		eval->diffuse += value;
-	}
+  if (eval->use_light_pass) {
+    if (CLOSURE_IS_BSDF_DIFFUSE(type))
+      eval->diffuse += value;
+    else if (CLOSURE_IS_BSDF_GLOSSY(type))
+      eval->glossy += value;
+    else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
+      eval->transmission += value;
+    else if (CLOSURE_IS_BSDF_BSSRDF(type))
+      eval->subsurface += value;
+    else if (CLOSURE_IS_PHASE(type))
+      eval->scatter += value;
+
+    /* skipping transparent, this function is used by for eval(), will be zero then */
+  }
+  else
+#endif
+  {
+    eval->diffuse += value;
+  }
 }
 
 ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
 {
 #ifdef __PASSES__
-	if(eval->use_light_pass) {
-		return is_zero(eval->diffuse)
-			&& is_zero(eval->glossy)
-			&& is_zero(eval->transmission)
-			&& is_zero(eval->transparent)
-			&& is_zero(eval->subsurface)
-			&& is_zero(eval->scatter);
-	}
-	else
-#endif
-	{
-		return is_zero(eval->diffuse);
-	}
+  if (eval->use_light_pass) {
+    return is_zero(eval->diffuse) && is_zero(eval->glossy) && is_zero(eval->transmission) &&
+           is_zero(eval->transparent) && is_zero(eval->subsurface) && is_zero(eval->scatter);
+  }
+  else
+#endif
+  {
+    return is_zero(eval->diffuse);
+  }
 }
 
 ccl_device_inline void bsdf_eval_mis(BsdfEval *eval, float value)
 {
 #ifdef __PASSES__
-	if(eval->use_light_pass) {
-		eval->diffuse *= value;
-		eval->glossy *= value;
-		eval->transmission *= value;
-		eval->subsurface *= value;
-		eval->scatter *= value;
-
-		/* skipping transparent, this function is used by for eval(), will be zero then */
-	}
-	else
-#endif
-	{
-		eval->diffuse *= value;
-	}
+  if (eval->use_light_pass) {
+    eval->diffuse *= value;
+    eval->glossy *= value;
+    eval->transmission *= value;
+    eval->subsurface *= value;
+    eval->scatter *= value;
+
+    /* skipping transparent, this function is used by for eval(), will be zero then */
+  }
+  else
+#endif
+  {
+    eval->diffuse *= value;
+  }
 }
 
 ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float value)
 {
 #ifdef __SHADOW_TRICKS__
-	eval->sum_no_mis *= value;
+  eval->sum_no_mis *= value;
 #endif
-	bsdf_eval_mis(eval, value);
+  bsdf_eval_mis(eval, value);
 }
 
 ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value)
 {
 #ifdef __SHADOW_TRICKS__
-	eval->sum_no_mis *= value;
+  eval->sum_no_mis *= value;
 #endif
 #ifdef __PASSES__
-	if(eval->use_light_pass) {
-		eval->diffuse *= value;
-		eval->glossy *= value;
-		eval->transmission *= value;
-		eval->subsurface *= value;
-		eval->scatter *= value;
-
-		/* skipping transparent, this function is used by for eval(), will be zero then */
-	}
-	else
-		eval->diffuse *= value;
+  if (eval->use_light_pass) {
+    eval->diffuse *= value;
+    eval->glossy *= value;
+    eval->transmission *= value;
+    eval->subsurface *= value;
+    eval->scatter *= value;
+
+    /* skipping transparent, this function is used by for eval(), will be zero then */
+  }
+  else
+    eval->diffuse *= value;
 #else
-	eval->diffuse *= value;
+  eval->diffuse *= value;
 #endif
 }
 
 ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval)
 {
 #ifdef __PASSES__
-	if(eval->use_light_pass) {
-		return eval->diffuse + eval->glossy + eval->transmission + eval->subsurface + eval->scatter;
-	}
-	else
+  if (eval->use_light_pass) {
+    return eval->diffuse + eval->glossy + eval->transmission + eval->subsurface + eval->scatter;
+  }
+  else
 #endif
-	return eval->diffuse;
+    return eval->diffuse;
 }
 
 /* Path Radiance
@@ -175,115 +176,113 @@ ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval)
 
 ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
 {
-	/* clear all */
+  /* clear all */
 #ifdef __PASSES__
-	L->use_light_pass = use_light_pass;
-
-	if(use_light_pass) {
-		L->indirect = make_float3(0.0f, 0.0f, 0.0f);
-		L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
-
-		L->color_diffuse = make_float3(0.0f, 0.0f, 0.0f);
-		L->color_glossy = make_float3(0.0f, 0.0f, 0.0f);
-		L->color_transmission = make_float3(0.0f, 0.0f, 0.0f);
-		L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f);
-
-		L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
-		L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
-		L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
-		L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
-		L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
-
-		L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
-		L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
-		L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
-		L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
-		L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
-
-		L->transparent = 0.0f;
-		L->emission = make_float3(0.0f, 0.0f, 0.0f);
-		L->background = make_float3(0.0f, 0.0f, 0.0f);
-		L->ao = make_float3(0.0f, 0.0f, 0.0f);
-		L->shadow = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		L->mist = 0.0f;
-
-		L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
-		L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
-		L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
-		L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
-		L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
-		L->state.direct = make_float3(0.0f, 0.0f, 0.0f);
-	}
-	else
-#endif
-	{
-		L->transparent = 0.0f;
-		L->emission = make_float3(0.0f, 0.0f, 0.0f);
-	}
+  L->use_light_pass = use_light_pass;
+
+  if (use_light_pass) {
+    L->indirect = make_float3(0.0f, 0.0f, 0.0f);
+    L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
+
+    L->color_diffuse = make_float3(0.0f, 0.0f, 0.0f);
+    L->color_glossy = make_float3(0.0f, 0.0f, 0.0f);
+    L->color_transmission = make_float3(0.0f, 0.0f, 0.0f);
+    L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+
+    L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
+    L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
+    L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
+    L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+    L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
+
+    L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
+    L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
+    L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
+    L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+    L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
+
+    L->transparent = 0.0f;
+    L->emission = make_float3(0.0f, 0.0f, 0.0f);
+    L->background = make_float3(0.0f, 0.0f, 0.0f);
+    L->ao = make_float3(0.0f, 0.0f, 0.0f);
+    L->shadow = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    L->mist = 0.0f;
+
+    L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
+    L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
+    L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
+    L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
+    L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
+    L->state.direct = make_float3(0.0f, 0.0f, 0.0f);
+  }
+  else
+#endif
+  {
+    L->transparent = 0.0f;
+    L->emission = make_float3(0.0f, 0.0f, 0.0f);
+  }
 
 #ifdef __SHADOW_TRICKS__
-	L->path_total = make_float3(0.0f, 0.0f, 0.0f);
-	L->path_total_shaded = make_float3(0.0f, 0.0f, 0.0f);
-	L->shadow_background_color = make_float3(0.0f, 0.0f, 0.0f);
-	L->shadow_throughput = 0.0f;
-	L->shadow_transparency = 1.0f;
-	L->has_shadow_catcher = 0;
+  L->path_total = make_float3(0.0f, 0.0f, 0.0f);
+  L->path_total_shaded = make_float3(0.0f, 0.0f, 0.0f);
+  L->shadow_background_color = make_float3(0.0f, 0.0f, 0.0f);
+  L->shadow_throughput = 0.0f;
+  L->shadow_transparency = 1.0f;
+  L->has_shadow_catcher = 0;
 #endif
 
 #ifdef __DENOISING_FEATURES__
-	L->denoising_normal = make_float3(0.0f, 0.0f, 0.0f);
-	L->denoising_albedo = make_float3(0.0f, 0.0f, 0.0f);
-	L->denoising_depth = 0.0f;
+  L->denoising_normal = make_float3(0.0f, 0.0f, 0.0f);
+  L->denoising_albedo = make_float3(0.0f, 0.0f, 0.0f);
+  L->denoising_depth = 0.0f;
 #endif
 
 #ifdef __KERNEL_DEBUG__
-	L->debug_data.num_bvh_traversed_nodes = 0;
-	L->debug_data.num_bvh_traversed_instances = 0;
-	L->debug_data.num_bvh_intersections = 0;
-	L->debug_data.num_ray_bounces = 0;
+  L->debug_data.num_bvh_traversed_nodes = 0;
+  L->debug_data.num_bvh_traversed_instances = 0;
+  L->debug_data.num_bvh_intersections = 0;
+  L->debug_data.num_ray_bounces = 0;
 #endif
 }
 
-ccl_device_inline void path_radiance_bsdf_bounce(
-	KernelGlobals *kg,
-	PathRadianceState *L_state,
-	ccl_addr_space float3 *throughput,
-	BsdfEval *bsdf_eval,
-	float bsdf_pdf, int bounce, int bsdf_label)
+ccl_device_inline void path_radiance_bsdf_bounce(KernelGlobals *kg,
+                                                 PathRadianceState *L_state,
+                                                 ccl_addr_space float3 *throughput,
+                                                 BsdfEval *bsdf_eval,
+                                                 float bsdf_pdf,
+                                                 int bounce,
+                                                 int bsdf_label)
 {
-	float inverse_pdf = 1.0f/bsdf_pdf;
+  float inverse_pdf = 1.0f / bsdf_pdf;
 
 #ifdef __PASSES__
-	if(kernel_data.film.use_light_pass) {
-		if(bounce == 0 && !(bsdf_label & LABEL_TRANSPARENT)) {
-			/* first on directly visible surface */
-			float3 value = *throughput*inverse_pdf;
-
-			L_state->diffuse = bsdf_eval->diffuse*value;
-			L_state->glossy = bsdf_eval->glossy*value;
-			L_state->transmission = bsdf_eval->transmission*value;
-			L_state->subsurface = bsdf_eval->subsurface*value;
-			L_state->scatter = bsdf_eval->scatter*value;
-
-			*throughput = L_state->diffuse +
-			              L_state->glossy +
-			              L_state->transmission +
-			              L_state->subsurface +
-			              L_state->scatter;
-
-			L_state->direct = *throughput;
-		}
-		else {
-			/* transparent bounce before first hit, or indirectly visible through BSDF */
-			float3 sum = (bsdf_eval_sum(bsdf_eval) + bsdf_eval->transparent) * inverse_pdf;
-			*throughput *= sum;
-		}
-	}
-	else
-#endif
-	{
-		*throughput *= bsdf_eval->diffuse*inverse_pdf;
-	}
+  if (kernel_data.film.use_light_pass) {
+    if (bounce == 0 && !(bsdf_label & LABEL_TRANSPARENT)) {
+      /* first on directly visible surface */
+      float3 value = *throughput * inverse_pdf;
+
+      L_state->diffuse = bsdf_eval->diffuse * value;
+      L_state->glossy = bsdf_eval->glossy * value;
+      L_state->transmission = bsdf_eval->transmission * value;
+      L_state->subsurface = bsdf_eval->subsurface * value;
+      L_state->scatter = bsdf_eval->scatter * value;
+
+      *throughput = L_state->diffuse + L_state->glossy + L_state->transmission +
+                    L_state->subsurface + L_state->scatter;
+
+      L_state->direct = *throughput;
+    }
+    else {
+      /* transparent bounce before first hit, or indirectly visible through BSDF */
+      float3 sum = (bsdf_eval_sum(bsdf_eval) + bsdf_eval->transparent) * inverse_pdf;
+      *throughput *= sum;
+    }
+  }
+  else
+#endif
+  {
+    *throughput *= bsdf_eval->diffuse * inverse_pdf;
+  }
 }
 
 ccl_device_inline void path_radiance_accum_emission(PathRadiance *L,
@@ -292,25 +291,25 @@ ccl_device_inline void path_radiance_accum_emission(PathRadiance *L,
                                                     float3 value)
 {
 #ifdef __SHADOW_TRICKS__
-	if(state->flag & PATH_RAY_SHADOW_CATCHER) {
-		return;
-	}
+  if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+    return;
+  }
 #endif
 
 #ifdef __PASSES__
-	if(L->use_light_pass) {
-		if(state->bounce == 0)
-			L->emission += throughput*value;
-		else if(state->bounce == 1)
-			L->direct_emission += throughput*value;
-		else
-			L->indirect += throughput*value;
-	}
-	else
-#endif
-	{
-		L->emission += throughput*value;
-	}
+  if (L->use_light_pass) {
+    if (state->bounce == 0)
+      L->emission += throughput * value;
+    else if (state->bounce == 1)
+      L->direct_emission += throughput * value;
+    else
+      L->indirect += throughput * value;
+  }
+  else
+#endif
+  {
+    L->emission += throughput * value;
+  }
 }
 
 ccl_device_inline void path_radiance_accum_ao(PathRadiance *L,
@@ -320,57 +319,56 @@ ccl_device_inline void path_radiance_accum_ao(PathRadiance *L,
                                               float3 bsdf,
                                               float3 ao)
 {
-	/* Store AO pass. */
-	if(L->use_light_pass && state->bounce == 0) {
-		L->ao += alpha*throughput*ao;
-	}
+  /* Store AO pass. */
+  if (L->use_light_pass && state->bounce == 0) {
+    L->ao += alpha * throughput * ao;
+  }
 
 #ifdef __SHADOW_TRICKS__
-	/* For shadow catcher, accumulate ratio. */
-	if(state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-		float3 light = throughput * bsdf;
-		L->path_total += light;
-		L->path_total_shaded += ao * light;
+  /* For shadow catcher, accumulate ratio. */
+  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
+    float3 light = throughput * bsdf;
+    L->path_total += light;
+    L->path_total_shaded += ao * light;
 
-		if(state->flag & PATH_RAY_SHADOW_CATCHER) {
-			return;
-		}
-	}
+    if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+      return;
+    }
+  }
 #endif
 
 #ifdef __PASSES__
-	if(L->use_light_pass) {
-		if(state->bounce == 0) {
-			/* Directly visible lighting. */
-			L->direct_diffuse += throughput*bsdf*ao;
-		}
-		else {
-			/* Indirectly visible lighting after BSDF bounce. */
-			L->indirect += throughput*bsdf*ao;
-		}
-	}
-	else
-#endif
-	{
-		L->emission += throughput*bsdf*ao;
-	}
+  if (L->use_light_pass) {
+    if (state->bounce == 0) {
+      /* Directly visible lighting. */
+      L->direct_diffuse += throughput * bsdf * ao;
+    }
+    else {
+      /* Indirectly visible lighting after BSDF bounce. */
+      L->indirect += throughput * bsdf * ao;
+    }
+  }
+  else
+#endif
+  {
+    L->emission += throughput * bsdf * ao;
+  }
 }
 
-ccl_device_inline void path_radiance_accum_total_ao(
-        PathRadiance *L,
-        ccl_addr_space PathState *state,
-        float3 throughput,
-        float3 bsdf)
+ccl_device_inline void path_radiance_accum_total_ao(PathRadiance *L,
+                                                    ccl_addr_space PathState *state,
+                                                    float3 throughput,
+                                                    float3 bsdf)
 {
 #ifdef __SHADOW_TRICKS__
-	if(state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-		L->path_total += throughput * bsdf;
-	}
+  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
+    L->path_total += throughput * bsdf;
+  }
 #else
-	(void) L;
-	(void) state;
-	(void) throughput;
-	(void) bsdf;
+  (void)L;
+  (void)state;
+  (void)throughput;
+  (void)bsdf;
 #endif
 }
 
@@ -383,171 +381,166 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L,
                                                  bool is_lamp)
 {
 #ifdef __SHADOW_TRICKS__
-	if(state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-		float3 light = throughput * bsdf_eval->sum_no_mis;
-		L->path_total += light;
-		L->path_total_shaded += shadow * light;
+  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
+    float3 light = throughput * bsdf_eval->sum_no_mis;
+    L->path_total += light;
+    L->path_total_shaded += shadow * light;
 
-		if(state->flag & PATH_RAY_SHADOW_CATCHER) {
-			return;
-		}
-	}
+    if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+      return;
+    }
+  }
 #endif
 
 #ifdef __PASSES__
-	if(L->use_light_pass) {
-		if(state->bounce == 0) {
-			/* directly visible lighting */
-			L->direct_diffuse += throughput*bsdf_eval->diffuse*shadow;
-			L->direct_glossy += throughput*bsdf_eval->glossy*shadow;
-			L->direct_transmission += throughput*bsdf_eval->transmission*shadow;
-			L->direct_subsurface += throughput*bsdf_eval->subsurface*shadow;
-			L->direct_scatter += throughput*bsdf_eval->scatter*shadow;
-
-			if(is_lamp) {
-				L->shadow.x += shadow.x*shadow_fac;
-				L->shadow.y += shadow.y*shadow_fac;
-				L->shadow.z += shadow.z*shadow_fac;
-			}
-		}
-		else {
-			/* indirectly visible lighting after BSDF bounce */
-			L->indirect += throughput*bsdf_eval_sum(bsdf_eval)*shadow;
-		}
-	}
-	else
-#endif
-	{
-		L->emission += throughput*bsdf_eval->diffuse*shadow;
-	}
+  if (L->use_light_pass) {
+    if (state->bounce == 0) {
+      /* directly visible lighting */
+      L->direct_diffuse += throughput * bsdf_eval->diffuse * shadow;
+      L->direct_glossy += throughput * bsdf_eval->glossy * shadow;
+      L->direct_transmission += throughput * bsdf_eval->transmission * shadow;
+      L->direct_subsurface += throughput * bsdf_eval->subsurface * shadow;
+      L->direct_scatter += throughput * bsdf_eval->scatter * shadow;
+
+      if (is_lamp) {
+        L->shadow.x += shadow.x * shadow_fac;
+        L->shadow.y += shadow.y * shadow_fac;
+        L->shadow.z += shadow.z * shadow_fac;
+      }
+    }
+    else {
+      /* indirectly visible lighting after BSDF bounce */
+      L->indirect += throughput * bsdf_eval_sum(bsdf_eval) * shadow;
+    }
+  }
+  else
+#endif
+  {
+    L->emission += throughput * bsdf_eval->diffuse * shadow;
+  }
 }
 
-ccl_device_inline void path_radiance_accum_total_light(
-        PathRadiance *L,
-        ccl_addr_space PathState *state,
-        float3 throughput,
-        const BsdfEval *bsdf_eval)
+ccl_device_inline void path_radiance_accum_total_light(PathRadiance *L,
+                                                       ccl_addr_space PathState *state,
+                                                       float3 throughput,
+                                                       const BsdfEval *bsdf_eval)
 {
 #ifdef __SHADOW_TRICKS__
-	if(state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-		L->path_total += throughput * bsdf_eval->sum_no_mis;
-	}
+  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
+    L->path_total += throughput * bsdf_eval->sum_no_mis;
+  }
 #else
-	(void) L;
-	(void) state;
-	(void) throughput;
-	(void) bsdf_eval;
+  (void)L;
+  (void)state;
+  (void)throughput;
+  (void)bsdf_eval;
 #endif
 }
 
-ccl_device_inline void path_radiance_accum_background(
-        PathRadiance *L,
-        ccl_addr_space PathState *state,
-        float3 throughput,
-        float3 value)
+ccl_device_inline void path_radiance_accum_background(PathRadiance *L,
+                                                      ccl_addr_space PathState *state,
+                                                      float3 throughput,
+                                                      float3 value)
 {
 
 #ifdef __SHADOW_TRICKS__
-	if(state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-		L->path_total += throughput * value;
-		L->path_total_shaded += throughput * value * L->shadow_transparency;
+  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
+    L->path_total += throughput * value;
+    L->path_total_shaded += throughput * value * L->shadow_transparency;
 
-		if(state->flag & PATH_RAY_SHADOW_CATCHER) {
-			return;
-		}
-	}
+    if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+      return;
+    }
+  }
 #endif
 
 #ifdef __PASSES__
-	if(L->use_light_pass) {
-		if(state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)
-			L->background += throughput*value;
-		else if(state->bounce == 1)
-			L->direct_emission += throughput*value;
-		else
-			L->indirect += throughput*value;
-	}
-	else
-#endif
-	{
-		L->emission += throughput*value;
-	}
+  if (L->use_light_pass) {
+    if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)
+      L->background += throughput * value;
+    else if (state->bounce == 1)
+      L->direct_emission += throughput * value;
+    else
+      L->indirect += throughput * value;
+  }
+  else
+#endif
+  {
+    L->emission += throughput * value;
+  }
 
 #ifdef __DENOISING_FEATURES__
-	L->denoising_albedo += state->denoising_feature_weight * value;
-#endif  /* __DENOISING_FEATURES__ */
+  L->denoising_albedo += state->denoising_feature_weight * value;
+#endif /* __DENOISING_FEATURES__ */
 }
 
-ccl_device_inline void path_radiance_accum_transparent(
-        PathRadiance *L,
-        ccl_addr_space PathState *state,
-        float3 throughput)
+ccl_device_inline void path_radiance_accum_transparent(PathRadiance *L,
+                                                       ccl_addr_space PathState *state,
+                                                       float3 throughput)
 {
-	L->transparent += average(throughput);
+  L->transparent += average(throughput);
 }
 
 #ifdef __SHADOW_TRICKS__
-ccl_device_inline void path_radiance_accum_shadowcatcher(
-        PathRadiance *L,
-        float3 throughput,
-        float3 background)
+ccl_device_inline void path_radiance_accum_shadowcatcher(PathRadiance *L,
+                                                         float3 throughput,
+                                                         float3 background)
 {
-	L->shadow_throughput += average(throughput);
-	L->shadow_background_color += throughput * background;
-	L->has_shadow_catcher = 1;
+  L->shadow_throughput += average(throughput);
+  L->shadow_background_color += throughput * background;
+  L->has_shadow_catcher = 1;
 }
 #endif
 
 ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L)
 {
 #ifdef __PASSES__
-	/* this division is a bit ugly, but means we only have to keep track of
-	 * only a single throughput further along the path, here we recover just
-	 * the indirect path that is not influenced by any particular BSDF type */
-	if(L->use_light_pass) {
-		L->direct_emission = safe_divide_color(L->direct_emission, L->state.direct);
-		L->direct_diffuse += L->state.diffuse*L->direct_emission;
-		L->direct_glossy += L->state.glossy*L->direct_emission;
-		L->direct_transmission += L->state.transmission*L->direct_emission;
-		L->direct_subsurface += L->state.subsurface*L->direct_emission;
-		L->direct_scatter += L->state.scatter*L->direct_emission;
-
-		L->indirect = safe_divide_color(L->indirect, L->state.direct);
-		L->indirect_diffuse += L->state.diffuse*L->indirect;
-		L->indirect_glossy += L->state.glossy*L->indirect;
-		L->indirect_transmission += L->state.transmission*L->indirect;
-		L->indirect_subsurface += L->state.subsurface*L->indirect;
-		L->indirect_scatter += L->state.scatter*L->indirect;
-	}
+  /* this division is a bit ugly, but means we only have to keep track of
+   * only a single throughput further along the path, here we recover just
+   * the indirect path that is not influenced by any particular BSDF type */
+  if (L->use_light_pass) {
+    L->direct_emission = safe_divide_color(L->direct_emission, L->state.direct);
+    L->direct_diffuse += L->state.diffuse * L->direct_emission;
+    L->direct_glossy += L->state.glossy * L->direct_emission;
+    L->direct_transmission += L->state.transmission * L->direct_emission;
+    L->direct_subsurface += L->state.subsurface * L->direct_emission;
+    L->direct_scatter += L->state.scatter * L->direct_emission;
+
+    L->indirect = safe_divide_color(L->indirect, L->state.direct);
+    L->indirect_diffuse += L->state.diffuse * L->indirect;
+    L->indirect_glossy += L->state.glossy * L->indirect;
+    L->indirect_transmission += L->state.transmission * L->indirect;
+    L->indirect_subsurface += L->state.subsurface * L->indirect;
+    L->indirect_scatter += L->state.scatter * L->indirect;
+  }
 #endif
 }
 
 ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L)
 {
 #ifdef __PASSES__
-	if(L->use_light_pass) {
-		L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
-		L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
-		L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
-		L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
-		L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
+  if (L->use_light_pass) {
+    L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f);
+    L->state.glossy = make_float3(0.0f, 0.0f, 0.0f);
+    L->state.transmission = make_float3(0.0f, 0.0f, 0.0f);
+    L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f);
+    L->state.scatter = make_float3(0.0f, 0.0f, 0.0f);
 
-		L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
-		L->indirect = make_float3(0.0f, 0.0f, 0.0f);
-	}
+    L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
+    L->indirect = make_float3(0.0f, 0.0f, 0.0f);
+  }
 #endif
 }
 
-ccl_device_inline void path_radiance_copy_indirect(PathRadiance *L,
-                                                   const PathRadiance *L_src)
+ccl_device_inline void path_radiance_copy_indirect(PathRadiance *L, const PathRadiance *L_src)
 {
 #ifdef __PASSES__
-	if(L->use_light_pass) {
-		L->state = L_src->state;
+  if (L->use_light_pass) {
+    L->state = L_src->state;
 
-		L->direct_emission = L_src->direct_emission;
-		L->indirect = L_src->indirect;
-	}
+    L->direct_emission = L_src->direct_emission;
+    L->indirect = L_src->indirect;
+  }
 #endif
 }
 
@@ -557,213 +550,219 @@ ccl_device_inline void path_radiance_sum_shadowcatcher(KernelGlobals *kg,
                                                        float3 *L_sum,
                                                        float *alpha)
 {
-	/* Calculate current shadow of the path. */
-	float path_total = average(L->path_total);
-	float shadow;
-
-	if(UNLIKELY(!isfinite_safe(path_total))) {
-		kernel_assert(!"Non-finite total radiance along the path");
-		shadow = 0.0f;
-	}
-	else if(path_total == 0.0f) {
-		shadow = L->shadow_transparency;
-	}
-	else {
-		float path_total_shaded = average(L->path_total_shaded);
-		shadow = path_total_shaded / path_total;
-	}
-
-	/* Calculate final light sum and transparency for shadow catcher object. */
-	if(kernel_data.background.transparent) {
-		*alpha -= L->shadow_throughput * shadow;
-	}
-	else {
-		L->shadow_background_color *= shadow;
-		*L_sum += L->shadow_background_color;
-	}
+  /* Calculate current shadow of the path. */
+  float path_total = average(L->path_total);
+  float shadow;
+
+  if (UNLIKELY(!isfinite_safe(path_total))) {
+    kernel_assert(!"Non-finite total radiance along the path");
+    shadow = 0.0f;
+  }
+  else if (path_total == 0.0f) {
+    shadow = L->shadow_transparency;
+  }
+  else {
+    float path_total_shaded = average(L->path_total_shaded);
+    shadow = path_total_shaded / path_total;
+  }
+
+  /* Calculate final light sum and transparency for shadow catcher object. */
+  if (kernel_data.background.transparent) {
+    *alpha -= L->shadow_throughput * shadow;
+  }
+  else {
+    L->shadow_background_color *= shadow;
+    *L_sum += L->shadow_background_color;
+  }
 }
 #endif
 
-ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadiance *L, float *alpha)
+ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg,
+                                                     PathRadiance *L,
+                                                     float *alpha)
 {
-	float3 L_sum;
-	/* Light Passes are used */
+  float3 L_sum;
+  /* Light Passes are used */
 #ifdef __PASSES__
-	float3 L_direct, L_indirect;
-	float clamp_direct = kernel_data.integrator.sample_clamp_direct;
-	float clamp_indirect = kernel_data.integrator.sample_clamp_indirect;
-	if(L->use_light_pass) {
-		path_radiance_sum_indirect(L);
-
-		L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_subsurface + L->direct_scatter + L->emission;
-		L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + L->indirect_subsurface + L->indirect_scatter;
-
-		if(!kernel_data.background.transparent)
-			L_direct += L->background;
-
-		L_sum = L_direct + L_indirect;
-		float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
-
-		/* Reject invalid value */
-		if(!isfinite_safe(sum)) {
-			kernel_assert(!"Non-finite sum in path_radiance_clamp_and_sum!");
-			L_sum = make_float3(0.0f, 0.0f, 0.0f);
-
-			L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
-			L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
-			L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
-			L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
-			L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
-
-			L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
-			L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
-			L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
-			L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
-			L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
-
-			L->emission = make_float3(0.0f, 0.0f, 0.0f);
-		}
-
-		/* Clamp direct and indirect samples */
-#ifdef __CLAMP_SAMPLE__
-		else if(sum > clamp_direct || sum > clamp_indirect) {
-			float scale;
-
-			/* Direct */
-			float sum_direct = fabsf(L_direct.x) + fabsf(L_direct.y) + fabsf(L_direct.z);
-			if(sum_direct > clamp_direct) {
-				scale = clamp_direct/sum_direct;
-				L_direct *= scale;
-
-				L->direct_diffuse *= scale;
-				L->direct_glossy *= scale;
-				L->direct_transmission *= scale;
-				L->direct_subsurface *= scale;
-				L->direct_scatter *= scale;
-				L->emission *= scale;
-				L->background *= scale;
-			}
-
-			/* Indirect */
-			float sum_indirect = fabsf(L_indirect.x) + fabsf(L_indirect.y) + fabsf(L_indirect.z);
-			if(sum_indirect > clamp_indirect) {
-				scale = clamp_indirect/sum_indirect;
-				L_indirect *= scale;
-
-				L->indirect_diffuse *= scale;
-				L->indirect_glossy *= scale;
-				L->indirect_transmission *= scale;
-				L->indirect_subsurface *= scale;
-				L->indirect_scatter *= scale;
-			}
-
-			/* Sum again, after clamping */
-			L_sum = L_direct + L_indirect;
-		}
-#endif
-	}
-
-	/* No Light Passes */
-	else
-#endif
-	{
-		L_sum = L->emission;
-
-		/* Reject invalid value */
-		float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
-		if(!isfinite_safe(sum)) {
-			kernel_assert(!"Non-finite final sum in path_radiance_clamp_and_sum!");
-			L_sum = make_float3(0.0f, 0.0f, 0.0f);
-		}
-	}
-
-	/* Compute alpha. */
-	*alpha = 1.0f - L->transparent;
-
-	/* Add shadow catcher contributions. */
+  float3 L_direct, L_indirect;
+  float clamp_direct = kernel_data.integrator.sample_clamp_direct;
+  float clamp_indirect = kernel_data.integrator.sample_clamp_indirect;
+  if (L->use_light_pass) {
+    path_radiance_sum_indirect(L);
+
+    L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission +
+               L->direct_subsurface + L->direct_scatter + L->emission;
+    L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission +
+                 L->indirect_subsurface + L->indirect_scatter;
+
+    if (!kernel_data.background.transparent)
+      L_direct += L->background;
+
+    L_sum = L_direct + L_indirect;
+    float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
+
+    /* Reject invalid value */
+    if (!isfinite_safe(sum)) {
+      kernel_assert(!"Non-finite sum in path_radiance_clamp_and_sum!");
+      L_sum = make_float3(0.0f, 0.0f, 0.0f);
+
+      L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
+      L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
+      L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
+      L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+      L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
+
+      L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
+      L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
+      L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
+      L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+      L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
+
+      L->emission = make_float3(0.0f, 0.0f, 0.0f);
+    }
+
+    /* Clamp direct and indirect samples */
+#  ifdef __CLAMP_SAMPLE__
+    else if (sum > clamp_direct || sum > clamp_indirect) {
+      float scale;
+
+      /* Direct */
+      float sum_direct = fabsf(L_direct.x) + fabsf(L_direct.y) + fabsf(L_direct.z);
+      if (sum_direct > clamp_direct) {
+        scale = clamp_direct / sum_direct;
+        L_direct *= scale;
+
+        L->direct_diffuse *= scale;
+        L->direct_glossy *= scale;
+        L->direct_transmission *= scale;
+        L->direct_subsurface *= scale;
+        L->direct_scatter *= scale;
+        L->emission *= scale;
+        L->background *= scale;
+      }
+
+      /* Indirect */
+      float sum_indirect = fabsf(L_indirect.x) + fabsf(L_indirect.y) + fabsf(L_indirect.z);
+      if (sum_indirect > clamp_indirect) {
+        scale = clamp_indirect / sum_indirect;
+        L_indirect *= scale;
+
+        L->indirect_diffuse *= scale;
+        L->indirect_glossy *= scale;
+        L->indirect_transmission *= scale;
+        L->indirect_subsurface *= scale;
+        L->indirect_scatter *= scale;
+      }
+
+      /* Sum again, after clamping */
+      L_sum = L_direct + L_indirect;
+    }
+#  endif
+  }
+
+  /* No Light Passes */
+  else
+#endif
+  {
+    L_sum = L->emission;
+
+    /* Reject invalid value */
+    float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
+    if (!isfinite_safe(sum)) {
+      kernel_assert(!"Non-finite final sum in path_radiance_clamp_and_sum!");
+      L_sum = make_float3(0.0f, 0.0f, 0.0f);
+    }
+  }
+
+  /* Compute alpha. */
+  *alpha = 1.0f - L->transparent;
+
+  /* Add shadow catcher contributions. */
 #ifdef __SHADOW_TRICKS__
-	if(L->has_shadow_catcher) {
-		path_radiance_sum_shadowcatcher(kg, L, &L_sum, alpha);
-	}
-#endif  /* __SHADOW_TRICKS__ */
+  if (L->has_shadow_catcher) {
+    path_radiance_sum_shadowcatcher(kg, L, &L_sum, alpha);
+  }
+#endif /* __SHADOW_TRICKS__ */
 
-	return L_sum;
+  return L_sum;
 }
 
-ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg, PathRadiance *L, float3 *noisy, float3 *clean)
+ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg,
+                                                     PathRadiance *L,
+                                                     float3 *noisy,
+                                                     float3 *clean)
 {
 #ifdef __PASSES__
-	kernel_assert(L->use_light_pass);
-
-	*clean = L->emission + L->background;
-	*noisy = L->direct_scatter + L->indirect_scatter;
-
-#  define ADD_COMPONENT(flag, component)     \
-	if(kernel_data.film.denoising_flags & flag) \
-		*clean += component;                 \
-	else                                     \
-		*noisy += component;
-
-	ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_DIR,      L->direct_diffuse);
-	ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_IND,      L->indirect_diffuse);
-	ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_DIR,       L->direct_glossy);
-	ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND,       L->indirect_glossy);
-	ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission);
-	ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission);
-	ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_DIR,   L->direct_subsurface);
-	ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_IND,   L->indirect_subsurface);
+  kernel_assert(L->use_light_pass);
+
+  *clean = L->emission + L->background;
+  *noisy = L->direct_scatter + L->indirect_scatter;
+
+#  define ADD_COMPONENT(flag, component) \
+    if (kernel_data.film.denoising_flags & flag) \
+      *clean += component; \
+    else \
+      *noisy += component;
+
+  ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_DIR, L->direct_diffuse);
+  ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_IND, L->indirect_diffuse);
+  ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_DIR, L->direct_glossy);
+  ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND, L->indirect_glossy);
+  ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission);
+  ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission);
+  ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_DIR, L->direct_subsurface);
+  ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_IND, L->indirect_subsurface);
 #  undef ADD_COMPONENT
 #else
-	*noisy = L->emission;
-	*clean = make_float3(0.0f, 0.0f, 0.0f);
+  *noisy = L->emission;
+  *clean = make_float3(0.0f, 0.0f, 0.0f);
 #endif
 
 #ifdef __SHADOW_TRICKS__
-	if(L->has_shadow_catcher) {
-		*noisy += L->shadow_background_color;
-	}
+  if (L->has_shadow_catcher) {
+    *noisy += L->shadow_background_color;
+  }
 #endif
 
-	*noisy = ensure_finite3(*noisy);
-	*clean = ensure_finite3(*clean);
+  *noisy = ensure_finite3(*noisy);
+  *clean = ensure_finite3(*clean);
 }
 
 ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance *L_sample)
 {
 #ifdef __SPLIT_KERNEL__
 #  define safe_float3_add(f, v) \
-	do { \
-		ccl_global float *p = (ccl_global float*)(&(f)); \
-		atomic_add_and_fetch_float(p+0, (v).x); \
-		atomic_add_and_fetch_float(p+1, (v).y); \
-		atomic_add_and_fetch_float(p+2, (v).z); \
-	} while(0)
-#  define safe_float_add(f, v) \
-		atomic_add_and_fetch_float(&(f), (v))
+    do { \
+      ccl_global float *p = (ccl_global float *)(&(f)); \
+      atomic_add_and_fetch_float(p + 0, (v).x); \
+      atomic_add_and_fetch_float(p + 1, (v).y); \
+      atomic_add_and_fetch_float(p + 2, (v).z); \
+    } while (0)
+#  define safe_float_add(f, v) atomic_add_and_fetch_float(&(f), (v))
 #else
 #  define safe_float3_add(f, v) (f) += (v)
 #  define safe_float_add(f, v) (f) += (v)
-#endif  /* __SPLIT_KERNEL__ */
+#endif /* __SPLIT_KERNEL__ */
 
 #ifdef __PASSES__
-	safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse);
-	safe_float3_add(L->direct_glossy, L_sample->direct_glossy);
-	safe_float3_add(L->direct_transmission, L_sample->direct_transmission);
-	safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface);
-	safe_float3_add(L->direct_scatter, L_sample->direct_scatter);
-
-	safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse);
-	safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy);
-	safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission);
-	safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface);
-	safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter);
-
-	safe_float3_add(L->background, L_sample->background);
-	safe_float3_add(L->ao, L_sample->ao);
-	safe_float3_add(L->shadow, L_sample->shadow);
-	safe_float_add(L->mist, L_sample->mist);
-#endif  /* __PASSES__ */
-	safe_float3_add(L->emission, L_sample->emission);
+  safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse);
+  safe_float3_add(L->direct_glossy, L_sample->direct_glossy);
+  safe_float3_add(L->direct_transmission, L_sample->direct_transmission);
+  safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface);
+  safe_float3_add(L->direct_scatter, L_sample->direct_scatter);
+
+  safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse);
+  safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy);
+  safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission);
+  safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface);
+  safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter);
+
+  safe_float3_add(L->background, L_sample->background);
+  safe_float3_add(L->ao, L_sample->ao);
+  safe_float3_add(L->shadow, L_sample->shadow);
+  safe_float_add(L->mist, L_sample->mist);
+#endif /* __PASSES__ */
+  safe_float3_add(L->emission, L_sample->emission);
 
 #undef safe_float_add
 #undef safe_float3_add
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 37c163f2538..10b71bc6bdf 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -18,191 +18,172 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __BAKING__
 
-ccl_device_inline void compute_light_pass(KernelGlobals *kg,
-                                          ShaderData *sd,
-                                          PathRadiance *L,
-                                          uint rng_hash,
-                                          int pass_filter,
-                                          int sample)
+ccl_device_inline void compute_light_pass(
+    KernelGlobals *kg, ShaderData *sd, PathRadiance *L, uint rng_hash, int pass_filter, int sample)
 {
-	kernel_assert(kernel_data.film.use_light_pass);
-
-	PathRadiance L_sample;
-	PathState state;
-	Ray ray;
-	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-
-	/* emission and indirect shader data memory used by various functions */
-	ShaderData emission_sd, indirect_sd;
-
-	ray.P = sd->P + sd->Ng;
-	ray.D = -sd->Ng;
-	ray.t = FLT_MAX;
-#ifdef __CAMERA_MOTION__
-	ray.time = 0.5f;
-#endif
-
-	/* init radiance */
-	path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
-
-	/* init path state */
-	path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
-
-	/* evaluate surface shader */
-	shader_eval_surface(kg, sd, &state, state.flag);
-
-	/* TODO, disable more closures we don't need besides transparent */
-	shader_bsdf_disable_transparency(kg, sd);
-
-#ifdef __BRANCHED_PATH__
-	if(!kernel_data.integrator.branched) {
-		/* regular path tracer */
-#endif
-
-		/* sample ambient occlusion */
-		if(pass_filter & BAKE_FILTER_AO) {
-			kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd));
-		}
-
-		/* sample emission */
-		if((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
-			float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
-			path_radiance_accum_emission(&L_sample, &state, throughput, emission);
-		}
-
-		bool is_sss_sample = false;
-
-#ifdef __SUBSURFACE__
-		/* sample subsurface scattering */
-		if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
-			/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
-			SubsurfaceIndirectRays ss_indirect;
-			kernel_path_subsurface_init_indirect(&ss_indirect);
-			if(kernel_path_subsurface_scatter(kg,
-			                                  sd,
-			                                  &emission_sd,
-			                                  &L_sample,
-			                                  &state,
-			                                  &ray,
-			                                  &throughput,
-			                                  &ss_indirect))
-			{
-				while(ss_indirect.num_rays) {
-					kernel_path_subsurface_setup_indirect(kg,
-					                                      &ss_indirect,
-					                                      &state,
-					                                      &ray,
-					                                      &L_sample,
-					                                      &throughput);
-					kernel_path_indirect(kg,
-					                     &indirect_sd,
-					                     &emission_sd,
-					                     &ray,
-					                     throughput,
-					                     &state,
-					                     &L_sample);
-				}
-				is_sss_sample = true;
-			}
-		}
-#endif
-
-		/* sample light and BSDF */
-		if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
-			kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample);
-
-			if(kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) {
-#ifdef __LAMP_MIS__
-				state.ray_t = 0.0f;
-#endif
-				/* compute indirect light */
-				kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
-
-				/* sum and reset indirect light pass variables for the next samples */
-				path_radiance_sum_indirect(&L_sample);
-				path_radiance_reset_indirect(&L_sample);
-			}
-		}
-#ifdef __BRANCHED_PATH__
-	}
-	else {
-		/* branched path tracer */
-
-		/* sample ambient occlusion */
-		if(pass_filter & BAKE_FILTER_AO) {
-			kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput);
-		}
-
-		/* sample emission */
-		if((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
-			float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
-			path_radiance_accum_emission(&L_sample, &state, throughput, emission);
-		}
-
-#ifdef __SUBSURFACE__
-		/* sample subsurface scattering */
-		if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
-			/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
-			kernel_branched_path_subsurface_scatter(kg, sd, &indirect_sd,
-				&emission_sd, &L_sample, &state, &ray, throughput);
-		}
-#endif
-
-		/* sample light and BSDF */
-		if(pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT)) {
-#if defined(__EMISSION__)
-			/* direct light */
-			if(kernel_data.integrator.use_direct_light) {
-				int all = kernel_data.integrator.sample_all_lights_direct;
-				kernel_branched_path_surface_connect_light(kg,
-					sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
-			}
-#endif
-
-			/* indirect light */
-			kernel_branched_path_surface_indirect_light(kg,
-				sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
-		}
-	}
-#endif
-
-	/* accumulate into master L */
-	path_radiance_accum_sample(L, &L_sample);
+  kernel_assert(kernel_data.film.use_light_pass);
+
+  PathRadiance L_sample;
+  PathState state;
+  Ray ray;
+  float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+
+  /* emission and indirect shader data memory used by various functions */
+  ShaderData emission_sd, indirect_sd;
+
+  ray.P = sd->P + sd->Ng;
+  ray.D = -sd->Ng;
+  ray.t = FLT_MAX;
+#  ifdef __CAMERA_MOTION__
+  ray.time = 0.5f;
+#  endif
+
+  /* init radiance */
+  path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
+
+  /* init path state */
+  path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
+
+  /* evaluate surface shader */
+  shader_eval_surface(kg, sd, &state, state.flag);
+
+  /* TODO, disable more closures we don't need besides transparent */
+  shader_bsdf_disable_transparency(kg, sd);
+
+#  ifdef __BRANCHED_PATH__
+  if (!kernel_data.integrator.branched) {
+    /* regular path tracer */
+#  endif
+
+    /* sample ambient occlusion */
+    if (pass_filter & BAKE_FILTER_AO) {
+      kernel_path_ao(
+          kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd));
+    }
+
+    /* sample emission */
+    if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
+      float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+      path_radiance_accum_emission(&L_sample, &state, throughput, emission);
+    }
+
+    bool is_sss_sample = false;
+
+#  ifdef __SUBSURFACE__
+    /* sample subsurface scattering */
+    if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
+      /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+      SubsurfaceIndirectRays ss_indirect;
+      kernel_path_subsurface_init_indirect(&ss_indirect);
+      if (kernel_path_subsurface_scatter(
+              kg, sd, &emission_sd, &L_sample, &state, &ray, &throughput, &ss_indirect)) {
+        while (ss_indirect.num_rays) {
+          kernel_path_subsurface_setup_indirect(
+              kg, &ss_indirect, &state, &ray, &L_sample, &throughput);
+          kernel_path_indirect(
+              kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
+        }
+        is_sss_sample = true;
+      }
+    }
+#  endif
+
+    /* sample light and BSDF */
+    if (!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
+      kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample);
+
+      if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) {
+#  ifdef __LAMP_MIS__
+        state.ray_t = 0.0f;
+#  endif
+        /* compute indirect light */
+        kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
+
+        /* sum and reset indirect light pass variables for the next samples */
+        path_radiance_sum_indirect(&L_sample);
+        path_radiance_reset_indirect(&L_sample);
+      }
+    }
+#  ifdef __BRANCHED_PATH__
+  }
+  else {
+    /* branched path tracer */
+
+    /* sample ambient occlusion */
+    if (pass_filter & BAKE_FILTER_AO) {
+      kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput);
+    }
+
+    /* sample emission */
+    if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
+      float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+      path_radiance_accum_emission(&L_sample, &state, throughput, emission);
+    }
+
+#    ifdef __SUBSURFACE__
+    /* sample subsurface scattering */
+    if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
+      /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+      kernel_branched_path_subsurface_scatter(
+          kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput);
+    }
+#    endif
+
+    /* sample light and BSDF */
+    if (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT)) {
+#    if defined(__EMISSION__)
+      /* direct light */
+      if (kernel_data.integrator.use_direct_light) {
+        int all = kernel_data.integrator.sample_all_lights_direct;
+        kernel_branched_path_surface_connect_light(
+            kg, sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
+      }
+#    endif
+
+      /* indirect light */
+      kernel_branched_path_surface_indirect_light(
+          kg, sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
+    }
+  }
+#  endif
+
+  /* accumulate into master L */
+  path_radiance_accum_sample(L, &L_sample);
 }
 
 /* this helps with AA but it's not the real solution as it does not AA the geometry
  *  but it's better than nothing, thus committed */
 ccl_device_inline float bake_clamp_mirror_repeat(float u, float max)
 {
-	/* use mirror repeat (like opengl texture) so that if the barycentric
-	 * coordinate goes past the end of the triangle it is not always clamped
-	 * to the same value, gives ugly patterns */
-	u /= max;
-	float fu = floorf(u);
-	u = u - fu;
-
-	return ((((int)fu) & 1)? 1.0f - u: u) * max;
+  /* use mirror repeat (like opengl texture) so that if the barycentric
+   * coordinate goes past the end of the triangle it is not always clamped
+   * to the same value, gives ugly patterns */
+  u /= max;
+  float fu = floorf(u);
+  u = u - fu;
+
+  return ((((int)fu) & 1) ? 1.0f - u : u) * max;
 }
 
 ccl_device_inline float3 kernel_bake_shader_bsdf(KernelGlobals *kg,
                                                  ShaderData *sd,
                                                  const ShaderEvalType type)
 {
-	switch(type) {
-		case SHADER_EVAL_DIFFUSE:
-			return shader_bsdf_diffuse(kg, sd);
-		case SHADER_EVAL_GLOSSY:
-			return shader_bsdf_glossy(kg, sd);
-		case SHADER_EVAL_TRANSMISSION:
-			return shader_bsdf_transmission(kg, sd);
-#ifdef __SUBSURFACE__
-		case SHADER_EVAL_SUBSURFACE:
-			return shader_bsdf_subsurface(kg, sd);
-#endif
-		default:
-			kernel_assert(!"Unknown bake type passed to BSDF evaluate");
-			return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  switch (type) {
+    case SHADER_EVAL_DIFFUSE:
+      return shader_bsdf_diffuse(kg, sd);
+    case SHADER_EVAL_GLOSSY:
+      return shader_bsdf_glossy(kg, sd);
+    case SHADER_EVAL_TRANSMISSION:
+      return shader_bsdf_transmission(kg, sd);
+#  ifdef __SUBSURFACE__
+    case SHADER_EVAL_SUBSURFACE:
+      return shader_bsdf_subsurface(kg, sd);
+#  endif
+    default:
+      kernel_assert(!"Unknown bake type passed to BSDF evaluate");
+      return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
 ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
@@ -213,316 +194,301 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
                                                        const ShaderEvalType type,
                                                        const int pass_filter)
 {
-	float3 color;
-	const bool is_color = (pass_filter & BAKE_FILTER_COLOR) != 0;
-	const bool is_direct = (pass_filter & BAKE_FILTER_DIRECT) != 0;
-	const bool is_indirect = (pass_filter & BAKE_FILTER_INDIRECT) != 0;
-	float3 out = make_float3(0.0f, 0.0f, 0.0f);
-
-	if(is_color) {
-		if(is_direct || is_indirect) {
-			/* Leave direct and diffuse channel colored. */
-			color = make_float3(1.0f, 1.0f, 1.0f);
-		}
-		else {
-			/* surface color of the pass only */
-			shader_eval_surface(kg, sd, state, 0);
-			return kernel_bake_shader_bsdf(kg, sd, type);
-		}
-	}
-	else {
-		shader_eval_surface(kg, sd, state, 0);
-		color = kernel_bake_shader_bsdf(kg, sd, type);
-	}
-
-	if(is_direct) {
-		out += safe_divide_even_color(direct, color);
-	}
-
-	if(is_indirect) {
-		out += safe_divide_even_color(indirect, color);
-	}
-
-	return out;
+  float3 color;
+  const bool is_color = (pass_filter & BAKE_FILTER_COLOR) != 0;
+  const bool is_direct = (pass_filter & BAKE_FILTER_DIRECT) != 0;
+  const bool is_indirect = (pass_filter & BAKE_FILTER_INDIRECT) != 0;
+  float3 out = make_float3(0.0f, 0.0f, 0.0f);
+
+  if (is_color) {
+    if (is_direct || is_indirect) {
+      /* Leave direct and diffuse channel colored. */
+      color = make_float3(1.0f, 1.0f, 1.0f);
+    }
+    else {
+      /* surface color of the pass only */
+      shader_eval_surface(kg, sd, state, 0);
+      return kernel_bake_shader_bsdf(kg, sd, type);
+    }
+  }
+  else {
+    shader_eval_surface(kg, sd, state, 0);
+    color = kernel_bake_shader_bsdf(kg, sd, type);
+  }
+
+  if (is_direct) {
+    out += safe_divide_even_color(direct, color);
+  }
+
+  if (is_indirect) {
+    out += safe_divide_even_color(indirect, color);
+  }
+
+  return out;
 }
 
-ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output,
-                                     ShaderEvalType type, int pass_filter, int i, int offset, int sample)
+ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
+                                     ccl_global uint4 *input,
+                                     ccl_global float4 *output,
+                                     ShaderEvalType type,
+                                     int pass_filter,
+                                     int i,
+                                     int offset,
+                                     int sample)
 {
-	ShaderData sd;
-	PathState state = {0};
-	uint4 in = input[i * 2];
-	uint4 diff = input[i * 2 + 1];
-
-	float3 out = make_float3(0.0f, 0.0f, 0.0f);
-
-	int object = in.x;
-	int prim = in.y;
-
-	if(prim == -1)
-		return;
-
-	float u = __uint_as_float(in.z);
-	float v = __uint_as_float(in.w);
-
-	float dudx = __uint_as_float(diff.x);
-	float dudy = __uint_as_float(diff.y);
-	float dvdx = __uint_as_float(diff.z);
-	float dvdy = __uint_as_float(diff.w);
-
-	int num_samples = kernel_data.integrator.aa_samples;
-
-	/* random number generator */
-	uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed);
-
-	float filter_x, filter_y;
-	if(sample == 0) {
-		filter_x = filter_y = 0.5f;
-	}
-	else {
-		path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y);
-	}
-
-	/* subpixel u/v offset */
-	if(sample > 0) {
-		u = bake_clamp_mirror_repeat(u + dudx*(filter_x - 0.5f) + dudy*(filter_y - 0.5f), 1.0f);
-		v = bake_clamp_mirror_repeat(v + dvdx*(filter_x - 0.5f) + dvdy*(filter_y - 0.5f), 1.0f - u);
-	}
-
-	/* triangle */
-	int shader;
-	float3 P, Ng;
-
-	triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
-
-	/* light passes */
-	PathRadiance L;
-	path_radiance_init(&L, kernel_data.film.use_light_pass);
-
-	shader_setup_from_sample(kg, &sd,
-	                         P, Ng, Ng,
-	                         shader, object, prim,
-	                         u, v, 1.0f, 0.5f,
-	                         !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
-	                         LAMP_NONE);
-	sd.I = sd.N;
-
-	/* update differentials */
-	sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx;
-	sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy;
-	sd.du.dx = dudx;
-	sd.du.dy = dudy;
-	sd.dv.dx = dvdx;
-	sd.dv.dy = dvdy;
-
-	/* set RNG state for shaders that use sampling */
-	state.rng_hash = rng_hash;
-	state.rng_offset = 0;
-	state.sample = sample;
-	state.num_samples = num_samples;
-	state.min_ray_pdf = FLT_MAX;
-
-	/* light passes if we need more than color */
-	if(pass_filter & ~BAKE_FILTER_COLOR)
-		compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample);
-
-	switch(type) {
-		/* data passes */
-		case SHADER_EVAL_NORMAL:
-		case SHADER_EVAL_ROUGHNESS:
-		case SHADER_EVAL_EMISSION:
-		{
-			if(type != SHADER_EVAL_NORMAL || (sd.flag & SD_HAS_BUMP)) {
-				int path_flag = (type == SHADER_EVAL_EMISSION) ? PATH_RAY_EMISSION : 0;
-				shader_eval_surface(kg, &sd, &state, path_flag);
-			}
-
-			if(type == SHADER_EVAL_NORMAL) {
-				float3 N = sd.N;
-				if(sd.flag & SD_HAS_BUMP) {
-					N = shader_bsdf_average_normal(kg, &sd);
-				}
-
-				/* encoding: normal = (2 * color) - 1 */
-				out = N * 0.5f + make_float3(0.5f, 0.5f, 0.5f);
-			}
-			else if(type == SHADER_EVAL_ROUGHNESS) {
-				float roughness = shader_bsdf_average_roughness(&sd);
-				out = make_float3(roughness, roughness, roughness);
-			}
-			else {
-				out = shader_emissive_eval(&sd);
-			}
-			break;
-		}
-		case SHADER_EVAL_UV:
-		{
-			out = primitive_uv(kg, &sd);
-			break;
-		}
-#ifdef __PASSES__
-		/* light passes */
-		case SHADER_EVAL_AO:
-		{
-			out = L.ao;
-			break;
-		}
-		case SHADER_EVAL_COMBINED:
-		{
-			if((pass_filter & BAKE_FILTER_COMBINED) == BAKE_FILTER_COMBINED) {
-				float alpha;
-				out = path_radiance_clamp_and_sum(kg, &L, &alpha);
-				break;
-			}
-
-			if((pass_filter & BAKE_FILTER_DIFFUSE_DIRECT) == BAKE_FILTER_DIFFUSE_DIRECT)
-				out += L.direct_diffuse;
-			if((pass_filter & BAKE_FILTER_DIFFUSE_INDIRECT) == BAKE_FILTER_DIFFUSE_INDIRECT)
-				out += L.indirect_diffuse;
-
-			if((pass_filter & BAKE_FILTER_GLOSSY_DIRECT) == BAKE_FILTER_GLOSSY_DIRECT)
-				out += L.direct_glossy;
-			if((pass_filter & BAKE_FILTER_GLOSSY_INDIRECT) == BAKE_FILTER_GLOSSY_INDIRECT)
-				out += L.indirect_glossy;
-
-			if((pass_filter & BAKE_FILTER_TRANSMISSION_DIRECT) == BAKE_FILTER_TRANSMISSION_DIRECT)
-				out += L.direct_transmission;
-			if((pass_filter & BAKE_FILTER_TRANSMISSION_INDIRECT) == BAKE_FILTER_TRANSMISSION_INDIRECT)
-				out += L.indirect_transmission;
-
-			if((pass_filter & BAKE_FILTER_SUBSURFACE_DIRECT) == BAKE_FILTER_SUBSURFACE_DIRECT)
-				out += L.direct_subsurface;
-			if((pass_filter & BAKE_FILTER_SUBSURFACE_INDIRECT) == BAKE_FILTER_SUBSURFACE_INDIRECT)
-				out += L.indirect_subsurface;
-
-			if((pass_filter & BAKE_FILTER_EMISSION) != 0)
-				out += L.emission;
-
-			break;
-		}
-		case SHADER_EVAL_SHADOW:
-		{
-			out = make_float3(L.shadow.x, L.shadow.y, L.shadow.z);
-			break;
-		}
-		case SHADER_EVAL_DIFFUSE:
-		{
-			out = kernel_bake_evaluate_direct_indirect(kg,
-			                                           &sd,
-			                                           &state,
-			                                           L.direct_diffuse,
-			                                           L.indirect_diffuse,
-			                                           type,
-			                                           pass_filter);
-			break;
-		}
-		case SHADER_EVAL_GLOSSY:
-		{
-			out = kernel_bake_evaluate_direct_indirect(kg,
-			                                           &sd,
-			                                           &state,
-			                                           L.direct_glossy,
-			                                           L.indirect_glossy,
-			                                           type,
-			                                           pass_filter);
-			break;
-		}
-		case SHADER_EVAL_TRANSMISSION:
-		{
-			out = kernel_bake_evaluate_direct_indirect(kg,
-			                                           &sd,
-			                                           &state,
-			                                           L.direct_transmission,
-			                                           L.indirect_transmission,
-			                                           type,
-			                                           pass_filter);
-			break;
-		}
-		case SHADER_EVAL_SUBSURFACE:
-		{
-#ifdef __SUBSURFACE__
-			out = kernel_bake_evaluate_direct_indirect(kg,
-			                                           &sd,
-			                                           &state,
-			                                           L.direct_subsurface,
-			                                           L.indirect_subsurface,
-			                                           type,
-			                                           pass_filter);
-#endif
-			break;
-		}
-#endif
-
-		/* extra */
-		case SHADER_EVAL_ENVIRONMENT:
-		{
-			/* setup ray */
-			Ray ray;
-
-			ray.P = make_float3(0.0f, 0.0f, 0.0f);
-			ray.D = normalize(P);
-			ray.t = 0.0f;
-#ifdef __CAMERA_MOTION__
-			ray.time = 0.5f;
-#endif
-
-#ifdef __RAY_DIFFERENTIALS__
-			ray.dD = differential3_zero();
-			ray.dP = differential3_zero();
-#endif
-
-			/* setup shader data */
-			shader_setup_from_background(kg, &sd, &ray);
-
-			/* evaluate */
-			int path_flag = 0; /* we can't know which type of BSDF this is for */
-			shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION);
-			out = shader_background_eval(&sd);
-			break;
-		}
-		default:
-		{
-			/* no real shader, returning the position of the verts for debugging */
-			out = normalize(P);
-			break;
-		}
-	}
-
-	/* write output */
-	const float output_fac = 1.0f/num_samples;
-	const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
-
-	output[i] = (sample == 0)? scaled_result: output[i] + scaled_result;
+  ShaderData sd;
+  PathState state = {0};
+  uint4 in = input[i * 2];
+  uint4 diff = input[i * 2 + 1];
+
+  float3 out = make_float3(0.0f, 0.0f, 0.0f);
+
+  int object = in.x;
+  int prim = in.y;
+
+  if (prim == -1)
+    return;
+
+  float u = __uint_as_float(in.z);
+  float v = __uint_as_float(in.w);
+
+  float dudx = __uint_as_float(diff.x);
+  float dudy = __uint_as_float(diff.y);
+  float dvdx = __uint_as_float(diff.z);
+  float dvdy = __uint_as_float(diff.w);
+
+  int num_samples = kernel_data.integrator.aa_samples;
+
+  /* random number generator */
+  uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed);
+
+  float filter_x, filter_y;
+  if (sample == 0) {
+    filter_x = filter_y = 0.5f;
+  }
+  else {
+    path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y);
+  }
+
+  /* subpixel u/v offset */
+  if (sample > 0) {
+    u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
+    v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
+                                 1.0f - u);
+  }
+
+  /* triangle */
+  int shader;
+  float3 P, Ng;
+
+  triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
+
+  /* light passes */
+  PathRadiance L;
+  path_radiance_init(&L, kernel_data.film.use_light_pass);
+
+  shader_setup_from_sample(
+      kg,
+      &sd,
+      P,
+      Ng,
+      Ng,
+      shader,
+      object,
+      prim,
+      u,
+      v,
+      1.0f,
+      0.5f,
+      !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
+      LAMP_NONE);
+  sd.I = sd.N;
+
+  /* update differentials */
+  sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx;
+  sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy;
+  sd.du.dx = dudx;
+  sd.du.dy = dudy;
+  sd.dv.dx = dvdx;
+  sd.dv.dy = dvdy;
+
+  /* set RNG state for shaders that use sampling */
+  state.rng_hash = rng_hash;
+  state.rng_offset = 0;
+  state.sample = sample;
+  state.num_samples = num_samples;
+  state.min_ray_pdf = FLT_MAX;
+
+  /* light passes if we need more than color */
+  if (pass_filter & ~BAKE_FILTER_COLOR)
+    compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample);
+
+  switch (type) {
+    /* data passes */
+    case SHADER_EVAL_NORMAL:
+    case SHADER_EVAL_ROUGHNESS:
+    case SHADER_EVAL_EMISSION: {
+      if (type != SHADER_EVAL_NORMAL || (sd.flag & SD_HAS_BUMP)) {
+        int path_flag = (type == SHADER_EVAL_EMISSION) ? PATH_RAY_EMISSION : 0;
+        shader_eval_surface(kg, &sd, &state, path_flag);
+      }
+
+      if (type == SHADER_EVAL_NORMAL) {
+        float3 N = sd.N;
+        if (sd.flag & SD_HAS_BUMP) {
+          N = shader_bsdf_average_normal(kg, &sd);
+        }
+
+        /* encoding: normal = (2 * color) - 1 */
+        out = N * 0.5f + make_float3(0.5f, 0.5f, 0.5f);
+      }
+      else if (type == SHADER_EVAL_ROUGHNESS) {
+        float roughness = shader_bsdf_average_roughness(&sd);
+        out = make_float3(roughness, roughness, roughness);
+      }
+      else {
+        out = shader_emissive_eval(&sd);
+      }
+      break;
+    }
+    case SHADER_EVAL_UV: {
+      out = primitive_uv(kg, &sd);
+      break;
+    }
+#  ifdef __PASSES__
+    /* light passes */
+    case SHADER_EVAL_AO: {
+      out = L.ao;
+      break;
+    }
+    case SHADER_EVAL_COMBINED: {
+      if ((pass_filter & BAKE_FILTER_COMBINED) == BAKE_FILTER_COMBINED) {
+        float alpha;
+        out = path_radiance_clamp_and_sum(kg, &L, &alpha);
+        break;
+      }
+
+      if ((pass_filter & BAKE_FILTER_DIFFUSE_DIRECT) == BAKE_FILTER_DIFFUSE_DIRECT)
+        out += L.direct_diffuse;
+      if ((pass_filter & BAKE_FILTER_DIFFUSE_INDIRECT) == BAKE_FILTER_DIFFUSE_INDIRECT)
+        out += L.indirect_diffuse;
+
+      if ((pass_filter & BAKE_FILTER_GLOSSY_DIRECT) == BAKE_FILTER_GLOSSY_DIRECT)
+        out += L.direct_glossy;
+      if ((pass_filter & BAKE_FILTER_GLOSSY_INDIRECT) == BAKE_FILTER_GLOSSY_INDIRECT)
+        out += L.indirect_glossy;
+
+      if ((pass_filter & BAKE_FILTER_TRANSMISSION_DIRECT) == BAKE_FILTER_TRANSMISSION_DIRECT)
+        out += L.direct_transmission;
+      if ((pass_filter & BAKE_FILTER_TRANSMISSION_INDIRECT) == BAKE_FILTER_TRANSMISSION_INDIRECT)
+        out += L.indirect_transmission;
+
+      if ((pass_filter & BAKE_FILTER_SUBSURFACE_DIRECT) == BAKE_FILTER_SUBSURFACE_DIRECT)
+        out += L.direct_subsurface;
+      if ((pass_filter & BAKE_FILTER_SUBSURFACE_INDIRECT) == BAKE_FILTER_SUBSURFACE_INDIRECT)
+        out += L.indirect_subsurface;
+
+      if ((pass_filter & BAKE_FILTER_EMISSION) != 0)
+        out += L.emission;
+
+      break;
+    }
+    case SHADER_EVAL_SHADOW: {
+      out = make_float3(L.shadow.x, L.shadow.y, L.shadow.z);
+      break;
+    }
+    case SHADER_EVAL_DIFFUSE: {
+      out = kernel_bake_evaluate_direct_indirect(
+          kg, &sd, &state, L.direct_diffuse, L.indirect_diffuse, type, pass_filter);
+      break;
+    }
+    case SHADER_EVAL_GLOSSY: {
+      out = kernel_bake_evaluate_direct_indirect(
+          kg, &sd, &state, L.direct_glossy, L.indirect_glossy, type, pass_filter);
+      break;
+    }
+    case SHADER_EVAL_TRANSMISSION: {
+      out = kernel_bake_evaluate_direct_indirect(
+          kg, &sd, &state, L.direct_transmission, L.indirect_transmission, type, pass_filter);
+      break;
+    }
+    case SHADER_EVAL_SUBSURFACE: {
+#    ifdef __SUBSURFACE__
+      out = kernel_bake_evaluate_direct_indirect(
+          kg, &sd, &state, L.direct_subsurface, L.indirect_subsurface, type, pass_filter);
+#    endif
+      break;
+    }
+#  endif
+
+    /* extra */
+    case SHADER_EVAL_ENVIRONMENT: {
+      /* setup ray */
+      Ray ray;
+
+      ray.P = make_float3(0.0f, 0.0f, 0.0f);
+      ray.D = normalize(P);
+      ray.t = 0.0f;
+#  ifdef __CAMERA_MOTION__
+      ray.time = 0.5f;
+#  endif
+
+#  ifdef __RAY_DIFFERENTIALS__
+      ray.dD = differential3_zero();
+      ray.dP = differential3_zero();
+#  endif
+
+      /* setup shader data */
+      shader_setup_from_background(kg, &sd, &ray);
+
+      /* evaluate */
+      int path_flag = 0; /* we can't know which type of BSDF this is for */
+      shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION);
+      out = shader_background_eval(&sd);
+      break;
+    }
+    default: {
+      /* no real shader, returning the position of the verts for debugging */
+      out = normalize(P);
+      break;
+    }
+  }
+
+  /* write output */
+  const float output_fac = 1.0f / num_samples;
+  const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
+
+  output[i] = (sample == 0) ? scaled_result : output[i] + scaled_result;
 }
 
-#endif  /* __BAKING__ */
+#endif /* __BAKING__ */
 
 ccl_device void kernel_displace_evaluate(KernelGlobals *kg,
                                          ccl_global uint4 *input,
                                          ccl_global float4 *output,
                                          int i)
 {
-	ShaderData sd;
-	PathState state = {0};
-	uint4 in = input[i];
+  ShaderData sd;
+  PathState state = {0};
+  uint4 in = input[i];
 
-	/* setup shader data */
-	int object = in.x;
-	int prim = in.y;
-	float u = __uint_as_float(in.z);
-	float v = __uint_as_float(in.w);
+  /* setup shader data */
+  int object = in.x;
+  int prim = in.y;
+  float u = __uint_as_float(in.z);
+  float v = __uint_as_float(in.w);
 
-	shader_setup_from_displace(kg, &sd, object, prim, u, v);
+  shader_setup_from_displace(kg, &sd, object, prim, u, v);
 
-	/* evaluate */
-	float3 P = sd.P;
-	shader_eval_displacement(kg, &sd, &state);
-	float3 D = sd.P - P;
+  /* evaluate */
+  float3 P = sd.P;
+  shader_eval_displacement(kg, &sd, &state);
+  float3 D = sd.P - P;
 
-	object_inverse_dir_transform(kg, &sd, &D);
+  object_inverse_dir_transform(kg, &sd, &D);
 
-	/* write output */
-	output[i] += make_float4(D.x, D.y, D.z, 0.0f);
+  /* write output */
+  output[i] += make_float4(D.x, D.y, D.z, 0.0f);
 }
 
 ccl_device void kernel_background_evaluate(KernelGlobals *kg,
@@ -530,37 +496,37 @@ ccl_device void kernel_background_evaluate(KernelGlobals *kg,
                                            ccl_global float4 *output,
                                            int i)
 {
-	ShaderData sd;
-	PathState state = {0};
-	uint4 in = input[i];
-
-	/* setup ray */
-	Ray ray;
-	float u = __uint_as_float(in.x);
-	float v = __uint_as_float(in.y);
-
-	ray.P = make_float3(0.0f, 0.0f, 0.0f);
-	ray.D = equirectangular_to_direction(u, v);
-	ray.t = 0.0f;
+  ShaderData sd;
+  PathState state = {0};
+  uint4 in = input[i];
+
+  /* setup ray */
+  Ray ray;
+  float u = __uint_as_float(in.x);
+  float v = __uint_as_float(in.y);
+
+  ray.P = make_float3(0.0f, 0.0f, 0.0f);
+  ray.D = equirectangular_to_direction(u, v);
+  ray.t = 0.0f;
 #ifdef __CAMERA_MOTION__
-	ray.time = 0.5f;
+  ray.time = 0.5f;
 #endif
 
 #ifdef __RAY_DIFFERENTIALS__
-	ray.dD = differential3_zero();
-	ray.dP = differential3_zero();
+  ray.dD = differential3_zero();
+  ray.dP = differential3_zero();
 #endif
 
-	/* setup shader data */
-	shader_setup_from_background(kg, &sd, &ray);
+  /* setup shader data */
+  shader_setup_from_background(kg, &sd, &ray);
 
-	/* evaluate */
-	int path_flag = 0; /* we can't know which type of BSDF this is for */
-	shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION);
-	float3 color = shader_background_eval(&sd);
+  /* evaluate */
+  int path_flag = 0; /* we can't know which type of BSDF this is for */
+  shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION);
+  float3 color = shader_background_eval(&sd);
 
-	/* write output */
-	output[i] += make_float4(color.x, color.y, color.z, 0.0f);
+  /* write output */
+  output[i] += make_float4(color.x, color.y, color.z, 0.0f);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index b73ad47dad3..1085930c33a 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -20,209 +20,217 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device float2 camera_sample_aperture(ccl_constant KernelCamera *cam, float u, float v)
 {
-	float blades = cam->blades;
-	float2 bokeh;
-
-	if(blades == 0.0f) {
-		/* sample disk */
-		bokeh = concentric_sample_disk(u, v);
-	}
-	else {
-		/* sample polygon */
-		float rotation = cam->bladesrotation;
-		bokeh = regular_polygon_sample(blades, rotation, u, v);
-	}
-
-	/* anamorphic lens bokeh */
-	bokeh.x *= cam->inv_aperture_ratio;
-
-	return bokeh;
+  float blades = cam->blades;
+  float2 bokeh;
+
+  if (blades == 0.0f) {
+    /* sample disk */
+    bokeh = concentric_sample_disk(u, v);
+  }
+  else {
+    /* sample polygon */
+    float rotation = cam->bladesrotation;
+    bokeh = regular_polygon_sample(blades, rotation, u, v);
+  }
+
+  /* anamorphic lens bokeh */
+  bokeh.x *= cam->inv_aperture_ratio;
+
+  return bokeh;
 }
 
-ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
+ccl_device void camera_sample_perspective(KernelGlobals *kg,
+                                          float raster_x,
+                                          float raster_y,
+                                          float lens_u,
+                                          float lens_v,
+                                          ccl_addr_space Ray *ray)
 {
-	/* create ray form raster position */
-	ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
-	float3 raster = make_float3(raster_x, raster_y, 0.0f);
-	float3 Pcamera = transform_perspective(&rastertocamera, raster);
+  /* create ray form raster position */
+  ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
+  float3 raster = make_float3(raster_x, raster_y, 0.0f);
+  float3 Pcamera = transform_perspective(&rastertocamera, raster);
 
 #ifdef __CAMERA_MOTION__
-	if(kernel_data.cam.have_perspective_motion) {
-		/* TODO(sergey): Currently we interpolate projected coordinate which
-		 * gives nice looking result and which is simple, but is in fact a bit
-		 * different comparing to constructing projective matrix from an
-		 * interpolated field of view.
-		 */
-		if(ray->time < 0.5f) {
-			ProjectionTransform rastertocamera_pre = kernel_data.cam.perspective_pre;
-			float3 Pcamera_pre =
-			        transform_perspective(&rastertocamera_pre, raster);
-			Pcamera = interp(Pcamera_pre, Pcamera, ray->time * 2.0f);
-		}
-		else {
-			ProjectionTransform rastertocamera_post = kernel_data.cam.perspective_post;
-			float3 Pcamera_post =
-			        transform_perspective(&rastertocamera_post, raster);
-			Pcamera = interp(Pcamera, Pcamera_post, (ray->time - 0.5f) * 2.0f);
-		}
-	}
+  if (kernel_data.cam.have_perspective_motion) {
+    /* TODO(sergey): Currently we interpolate projected coordinate which
+     * gives nice looking result and which is simple, but is in fact a bit
+     * different comparing to constructing projective matrix from an
+     * interpolated field of view.
+     */
+    if (ray->time < 0.5f) {
+      ProjectionTransform rastertocamera_pre = kernel_data.cam.perspective_pre;
+      float3 Pcamera_pre = transform_perspective(&rastertocamera_pre, raster);
+      Pcamera = interp(Pcamera_pre, Pcamera, ray->time * 2.0f);
+    }
+    else {
+      ProjectionTransform rastertocamera_post = kernel_data.cam.perspective_post;
+      float3 Pcamera_post = transform_perspective(&rastertocamera_post, raster);
+      Pcamera = interp(Pcamera, Pcamera_post, (ray->time - 0.5f) * 2.0f);
+    }
+  }
 #endif
 
-	float3 P = make_float3(0.0f, 0.0f, 0.0f);
-	float3 D = Pcamera;
+  float3 P = make_float3(0.0f, 0.0f, 0.0f);
+  float3 D = Pcamera;
 
-	/* modify ray for depth of field */
-	float aperturesize = kernel_data.cam.aperturesize;
+  /* modify ray for depth of field */
+  float aperturesize = kernel_data.cam.aperturesize;
 
-	if(aperturesize > 0.0f) {
-		/* sample point on aperture */
-		float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v)*aperturesize;
+  if (aperturesize > 0.0f) {
+    /* sample point on aperture */
+    float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v) * aperturesize;
 
-		/* compute point on plane of focus */
-		float ft = kernel_data.cam.focaldistance/D.z;
-		float3 Pfocus = D*ft;
+    /* compute point on plane of focus */
+    float ft = kernel_data.cam.focaldistance / D.z;
+    float3 Pfocus = D * ft;
 
-		/* update ray for effect of lens */
-		P = make_float3(lensuv.x, lensuv.y, 0.0f);
-		D = normalize(Pfocus - P);
-	}
+    /* update ray for effect of lens */
+    P = make_float3(lensuv.x, lensuv.y, 0.0f);
+    D = normalize(Pfocus - P);
+  }
 
-	/* transform ray from camera to world */
-	Transform cameratoworld = kernel_data.cam.cameratoworld;
+  /* transform ray from camera to world */
+  Transform cameratoworld = kernel_data.cam.cameratoworld;
 
 #ifdef __CAMERA_MOTION__
-	if(kernel_data.cam.num_motion_steps) {
-		transform_motion_array_interpolate(
-			&cameratoworld,
-			kernel_tex_array(__camera_motion),
-			kernel_data.cam.num_motion_steps,
-			ray->time);
-	}
+  if (kernel_data.cam.num_motion_steps) {
+    transform_motion_array_interpolate(&cameratoworld,
+                                       kernel_tex_array(__camera_motion),
+                                       kernel_data.cam.num_motion_steps,
+                                       ray->time);
+  }
 #endif
 
-	P = transform_point(&cameratoworld, P);
-	D = normalize(transform_direction(&cameratoworld, D));
+  P = transform_point(&cameratoworld, P);
+  D = normalize(transform_direction(&cameratoworld, D));
 
-	bool use_stereo = kernel_data.cam.interocular_offset != 0.0f;
-	if(!use_stereo) {
-		/* No stereo */
-		ray->P = P;
-		ray->D = D;
+  bool use_stereo = kernel_data.cam.interocular_offset != 0.0f;
+  if (!use_stereo) {
+    /* No stereo */
+    ray->P = P;
+    ray->D = D;
 
 #ifdef __RAY_DIFFERENTIALS__
-		float3 Dcenter = transform_direction(&cameratoworld, Pcamera);
+    float3 Dcenter = transform_direction(&cameratoworld, Pcamera);
 
-		ray->dP = differential3_zero();
-		ray->dD.dx = normalize(Dcenter + float4_to_float3(kernel_data.cam.dx)) - normalize(Dcenter);
-		ray->dD.dy = normalize(Dcenter + float4_to_float3(kernel_data.cam.dy)) - normalize(Dcenter);
+    ray->dP = differential3_zero();
+    ray->dD.dx = normalize(Dcenter + float4_to_float3(kernel_data.cam.dx)) - normalize(Dcenter);
+    ray->dD.dy = normalize(Dcenter + float4_to_float3(kernel_data.cam.dy)) - normalize(Dcenter);
 #endif
-	}
-	else {
-		/* Spherical stereo */
-		spherical_stereo_transform(&kernel_data.cam, &P, &D);
-		ray->P = P;
-		ray->D = D;
+  }
+  else {
+    /* Spherical stereo */
+    spherical_stereo_transform(&kernel_data.cam, &P, &D);
+    ray->P = P;
+    ray->D = D;
 
 #ifdef __RAY_DIFFERENTIALS__
-		/* Ray differentials, computed from scratch using the raster coordinates
-		 * because we don't want to be affected by depth of field. We compute
-		 * ray origin and direction for the center and two neighbouring pixels
-		 * and simply take their differences. */
-		float3 Pnostereo = transform_point(&cameratoworld, make_float3(0.0f, 0.0f, 0.0f));
-
-		float3 Pcenter = Pnostereo;
-		float3 Dcenter = Pcamera;
-		Dcenter = normalize(transform_direction(&cameratoworld, Dcenter));
-		spherical_stereo_transform(&kernel_data.cam, &Pcenter, &Dcenter);
-
-		float3 Px = Pnostereo;
-		float3 Dx = transform_perspective(&rastertocamera, make_float3(raster_x + 1.0f, raster_y, 0.0f));
-		Dx = normalize(transform_direction(&cameratoworld, Dx));
-		spherical_stereo_transform(&kernel_data.cam, &Px, &Dx);
-
-		ray->dP.dx = Px - Pcenter;
-		ray->dD.dx = Dx - Dcenter;
-
-		float3 Py = Pnostereo;
-		float3 Dy = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y + 1.0f, 0.0f));
-		Dy = normalize(transform_direction(&cameratoworld, Dy));
-		spherical_stereo_transform(&kernel_data.cam, &Py, &Dy);
-
-		ray->dP.dy = Py - Pcenter;
-		ray->dD.dy = Dy - Dcenter;
+    /* Ray differentials, computed from scratch using the raster coordinates
+     * because we don't want to be affected by depth of field. We compute
+     * ray origin and direction for the center and two neighbouring pixels
+     * and simply take their differences. */
+    float3 Pnostereo = transform_point(&cameratoworld, make_float3(0.0f, 0.0f, 0.0f));
+
+    float3 Pcenter = Pnostereo;
+    float3 Dcenter = Pcamera;
+    Dcenter = normalize(transform_direction(&cameratoworld, Dcenter));
+    spherical_stereo_transform(&kernel_data.cam, &Pcenter, &Dcenter);
+
+    float3 Px = Pnostereo;
+    float3 Dx = transform_perspective(&rastertocamera,
+                                      make_float3(raster_x + 1.0f, raster_y, 0.0f));
+    Dx = normalize(transform_direction(&cameratoworld, Dx));
+    spherical_stereo_transform(&kernel_data.cam, &Px, &Dx);
+
+    ray->dP.dx = Px - Pcenter;
+    ray->dD.dx = Dx - Dcenter;
+
+    float3 Py = Pnostereo;
+    float3 Dy = transform_perspective(&rastertocamera,
+                                      make_float3(raster_x, raster_y + 1.0f, 0.0f));
+    Dy = normalize(transform_direction(&cameratoworld, Dy));
+    spherical_stereo_transform(&kernel_data.cam, &Py, &Dy);
+
+    ray->dP.dy = Py - Pcenter;
+    ray->dD.dy = Dy - Dcenter;
 #endif
-	}
+  }
 
 #ifdef __CAMERA_CLIPPING__
-	/* clipping */
-	float z_inv = 1.0f / normalize(Pcamera).z;
-	float nearclip = kernel_data.cam.nearclip * z_inv;
-	ray->P += nearclip * ray->D;
-	ray->dP.dx += nearclip * ray->dD.dx;
-	ray->dP.dy += nearclip * ray->dD.dy;
-	ray->t = kernel_data.cam.cliplength * z_inv;
+  /* clipping */
+  float z_inv = 1.0f / normalize(Pcamera).z;
+  float nearclip = kernel_data.cam.nearclip * z_inv;
+  ray->P += nearclip * ray->D;
+  ray->dP.dx += nearclip * ray->dD.dx;
+  ray->dP.dy += nearclip * ray->dD.dy;
+  ray->t = kernel_data.cam.cliplength * z_inv;
 #else
-	ray->t = FLT_MAX;
+  ray->t = FLT_MAX;
 #endif
 }
 
 /* Orthographic Camera */
-ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
+ccl_device void camera_sample_orthographic(KernelGlobals *kg,
+                                           float raster_x,
+                                           float raster_y,
+                                           float lens_u,
+                                           float lens_v,
+                                           ccl_addr_space Ray *ray)
 {
-	/* create ray form raster position */
-	ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
-	float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
-
-	float3 P;
-	float3 D = make_float3(0.0f, 0.0f, 1.0f);
-
-	/* modify ray for depth of field */
-	float aperturesize = kernel_data.cam.aperturesize;
-
-	if(aperturesize > 0.0f) {
-		/* sample point on aperture */
-		float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v)*aperturesize;
-
-		/* compute point on plane of focus */
-		float3 Pfocus = D * kernel_data.cam.focaldistance;
-
-		/* update ray for effect of lens */
-		float3 lensuvw = make_float3(lensuv.x, lensuv.y, 0.0f);
-		P = Pcamera + lensuvw;
-		D = normalize(Pfocus - lensuvw);
-	}
-	else {
-		P = Pcamera;
-	}
-	/* transform ray from camera to world */
-	Transform cameratoworld = kernel_data.cam.cameratoworld;
+  /* create ray form raster position */
+  ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
+  float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
+
+  float3 P;
+  float3 D = make_float3(0.0f, 0.0f, 1.0f);
+
+  /* modify ray for depth of field */
+  float aperturesize = kernel_data.cam.aperturesize;
+
+  if (aperturesize > 0.0f) {
+    /* sample point on aperture */
+    float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v) * aperturesize;
+
+    /* compute point on plane of focus */
+    float3 Pfocus = D * kernel_data.cam.focaldistance;
+
+    /* update ray for effect of lens */
+    float3 lensuvw = make_float3(lensuv.x, lensuv.y, 0.0f);
+    P = Pcamera + lensuvw;
+    D = normalize(Pfocus - lensuvw);
+  }
+  else {
+    P = Pcamera;
+  }
+  /* transform ray from camera to world */
+  Transform cameratoworld = kernel_data.cam.cameratoworld;
 
 #ifdef __CAMERA_MOTION__
-	if(kernel_data.cam.num_motion_steps) {
-		transform_motion_array_interpolate(
-			&cameratoworld,
-			kernel_tex_array(__camera_motion),
-			kernel_data.cam.num_motion_steps,
-			ray->time);
-	}
+  if (kernel_data.cam.num_motion_steps) {
+    transform_motion_array_interpolate(&cameratoworld,
+                                       kernel_tex_array(__camera_motion),
+                                       kernel_data.cam.num_motion_steps,
+                                       ray->time);
+  }
 #endif
 
-	ray->P = transform_point(&cameratoworld, P);
-	ray->D = normalize(transform_direction(&cameratoworld, D));
+  ray->P = transform_point(&cameratoworld, P);
+  ray->D = normalize(transform_direction(&cameratoworld, D));
 
 #ifdef __RAY_DIFFERENTIALS__
-	/* ray differential */
-	ray->dP.dx = float4_to_float3(kernel_data.cam.dx);
-	ray->dP.dy = float4_to_float3(kernel_data.cam.dy);
+  /* ray differential */
+  ray->dP.dx = float4_to_float3(kernel_data.cam.dx);
+  ray->dP.dy = float4_to_float3(kernel_data.cam.dy);
 
-	ray->dD = differential3_zero();
+  ray->dD = differential3_zero();
 #endif
 
 #ifdef __CAMERA_CLIPPING__
-	/* clipping */
-	ray->t = kernel_data.cam.cliplength;
+  /* clipping */
+  ray->t = kernel_data.cam.cliplength;
 #else
-	ray->t = FLT_MAX;
+  ray->t = FLT_MAX;
 #endif
 }
 
@@ -230,242 +238,244 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl
 
 ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
                                               const ccl_global DecomposedTransform *cam_motion,
-                                              float raster_x, float raster_y,
-                                              float lens_u, float lens_v,
+                                              float raster_x,
+                                              float raster_y,
+                                              float lens_u,
+                                              float lens_v,
                                               ccl_addr_space Ray *ray)
 {
-	ProjectionTransform rastertocamera = cam->rastertocamera;
-	float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
+  ProjectionTransform rastertocamera = cam->rastertocamera;
+  float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
 
-	/* create ray form raster position */
-	float3 P = make_float3(0.0f, 0.0f, 0.0f);
-	float3 D = panorama_to_direction(cam, Pcamera.x, Pcamera.y);
+  /* create ray form raster position */
+  float3 P = make_float3(0.0f, 0.0f, 0.0f);
+  float3 D = panorama_to_direction(cam, Pcamera.x, Pcamera.y);
 
-	/* indicates ray should not receive any light, outside of the lens */
-	if(is_zero(D)) {
-		ray->t = 0.0f;
-		return;
-	}
+  /* indicates ray should not receive any light, outside of the lens */
+  if (is_zero(D)) {
+    ray->t = 0.0f;
+    return;
+  }
 
-	/* modify ray for depth of field */
-	float aperturesize = cam->aperturesize;
+  /* modify ray for depth of field */
+  float aperturesize = cam->aperturesize;
 
-	if(aperturesize > 0.0f) {
-		/* sample point on aperture */
-		float2 lensuv = camera_sample_aperture(cam, lens_u, lens_v)*aperturesize;
+  if (aperturesize > 0.0f) {
+    /* sample point on aperture */
+    float2 lensuv = camera_sample_aperture(cam, lens_u, lens_v) * aperturesize;
 
-		/* compute point on plane of focus */
-		float3 Dfocus = normalize(D);
-		float3 Pfocus = Dfocus * cam->focaldistance;
+    /* compute point on plane of focus */
+    float3 Dfocus = normalize(D);
+    float3 Pfocus = Dfocus * cam->focaldistance;
 
-		/* calculate orthonormal coordinates perpendicular to Dfocus */
-		float3 U, V;
-		U = normalize(make_float3(1.0f, 0.0f, 0.0f) -  Dfocus.x * Dfocus);
-		V = normalize(cross(Dfocus, U));
+    /* calculate orthonormal coordinates perpendicular to Dfocus */
+    float3 U, V;
+    U = normalize(make_float3(1.0f, 0.0f, 0.0f) - Dfocus.x * Dfocus);
+    V = normalize(cross(Dfocus, U));
 
-		/* update ray for effect of lens */
-		P = U * lensuv.x + V * lensuv.y;
-		D = normalize(Pfocus - P);
-	}
+    /* update ray for effect of lens */
+    P = U * lensuv.x + V * lensuv.y;
+    D = normalize(Pfocus - P);
+  }
 
-	/* transform ray from camera to world */
-	Transform cameratoworld = cam->cameratoworld;
+  /* transform ray from camera to world */
+  Transform cameratoworld = cam->cameratoworld;
 
 #ifdef __CAMERA_MOTION__
-	if(cam->num_motion_steps) {
-		transform_motion_array_interpolate(
-			&cameratoworld,
-			cam_motion,
-			cam->num_motion_steps,
-			ray->time);
-	}
+  if (cam->num_motion_steps) {
+    transform_motion_array_interpolate(
+        &cameratoworld, cam_motion, cam->num_motion_steps, ray->time);
+  }
 #endif
 
-	P = transform_point(&cameratoworld, P);
-	D = normalize(transform_direction(&cameratoworld, D));
+  P = transform_point(&cameratoworld, P);
+  D = normalize(transform_direction(&cameratoworld, D));
 
-	/* Stereo transform */
-	bool use_stereo = cam->interocular_offset != 0.0f;
-	if(use_stereo) {
-		spherical_stereo_transform(cam, &P, &D);
-	}
+  /* Stereo transform */
+  bool use_stereo = cam->interocular_offset != 0.0f;
+  if (use_stereo) {
+    spherical_stereo_transform(cam, &P, &D);
+  }
 
-	ray->P = P;
-	ray->D = D;
+  ray->P = P;
+  ray->D = D;
 
 #ifdef __RAY_DIFFERENTIALS__
-	/* Ray differentials, computed from scratch using the raster coordinates
-	 * because we don't want to be affected by depth of field. We compute
-	 * ray origin and direction for the center and two neighbouring pixels
-	 * and simply take their differences. */
-	float3 Pcenter = Pcamera;
-	float3 Dcenter = panorama_to_direction(cam, Pcenter.x, Pcenter.y);
-	Pcenter = transform_point(&cameratoworld, Pcenter);
-	Dcenter = normalize(transform_direction(&cameratoworld, Dcenter));
-	if(use_stereo) {
-		spherical_stereo_transform(cam, &Pcenter, &Dcenter);
-	}
-
-	float3 Px = transform_perspective(&rastertocamera, make_float3(raster_x + 1.0f, raster_y, 0.0f));
-	float3 Dx = panorama_to_direction(cam, Px.x, Px.y);
-	Px = transform_point(&cameratoworld, Px);
-	Dx = normalize(transform_direction(&cameratoworld, Dx));
-	if(use_stereo) {
-		spherical_stereo_transform(cam, &Px, &Dx);
-	}
-
-	ray->dP.dx = Px - Pcenter;
-	ray->dD.dx = Dx - Dcenter;
-
-	float3 Py = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y + 1.0f, 0.0f));
-	float3 Dy = panorama_to_direction(cam, Py.x, Py.y);
-	Py = transform_point(&cameratoworld, Py);
-	Dy = normalize(transform_direction(&cameratoworld, Dy));
-	if(use_stereo) {
-		spherical_stereo_transform(cam, &Py, &Dy);
-	}
-
-	ray->dP.dy = Py - Pcenter;
-	ray->dD.dy = Dy - Dcenter;
+  /* Ray differentials, computed from scratch using the raster coordinates
+   * because we don't want to be affected by depth of field. We compute
+   * ray origin and direction for the center and two neighbouring pixels
+   * and simply take their differences. */
+  float3 Pcenter = Pcamera;
+  float3 Dcenter = panorama_to_direction(cam, Pcenter.x, Pcenter.y);
+  Pcenter = transform_point(&cameratoworld, Pcenter);
+  Dcenter = normalize(transform_direction(&cameratoworld, Dcenter));
+  if (use_stereo) {
+    spherical_stereo_transform(cam, &Pcenter, &Dcenter);
+  }
+
+  float3 Px = transform_perspective(&rastertocamera, make_float3(raster_x + 1.0f, raster_y, 0.0f));
+  float3 Dx = panorama_to_direction(cam, Px.x, Px.y);
+  Px = transform_point(&cameratoworld, Px);
+  Dx = normalize(transform_direction(&cameratoworld, Dx));
+  if (use_stereo) {
+    spherical_stereo_transform(cam, &Px, &Dx);
+  }
+
+  ray->dP.dx = Px - Pcenter;
+  ray->dD.dx = Dx - Dcenter;
+
+  float3 Py = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y + 1.0f, 0.0f));
+  float3 Dy = panorama_to_direction(cam, Py.x, Py.y);
+  Py = transform_point(&cameratoworld, Py);
+  Dy = normalize(transform_direction(&cameratoworld, Dy));
+  if (use_stereo) {
+    spherical_stereo_transform(cam, &Py, &Dy);
+  }
+
+  ray->dP.dy = Py - Pcenter;
+  ray->dD.dy = Dy - Dcenter;
 #endif
 
 #ifdef __CAMERA_CLIPPING__
-	/* clipping */
-	float nearclip = cam->nearclip;
-	ray->P += nearclip * ray->D;
-	ray->dP.dx += nearclip * ray->dD.dx;
-	ray->dP.dy += nearclip * ray->dD.dy;
-	ray->t = cam->cliplength;
+  /* clipping */
+  float nearclip = cam->nearclip;
+  ray->P += nearclip * ray->D;
+  ray->dP.dx += nearclip * ray->dD.dx;
+  ray->dP.dy += nearclip * ray->dD.dy;
+  ray->t = cam->cliplength;
 #else
-	ray->t = FLT_MAX;
+  ray->t = FLT_MAX;
 #endif
 }
 
 /* Common */
 
 ccl_device_inline void camera_sample(KernelGlobals *kg,
-                                     int x, int y,
-                                     float filter_u, float filter_v,
-                                     float lens_u, float lens_v,
+                                     int x,
+                                     int y,
+                                     float filter_u,
+                                     float filter_v,
+                                     float lens_u,
+                                     float lens_v,
                                      float time,
                                      ccl_addr_space Ray *ray)
 {
-	/* pixel filter */
-	int filter_table_offset = kernel_data.film.filter_table_offset;
-	float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE);
-	float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE);
+  /* pixel filter */
+  int filter_table_offset = kernel_data.film.filter_table_offset;
+  float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE);
+  float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE);
 
 #ifdef __CAMERA_MOTION__
-	/* motion blur */
-	if(kernel_data.cam.shuttertime == -1.0f) {
-		ray->time = 0.5f;
-	}
-	else {
-		/* TODO(sergey): Such lookup is unneeded when there's rolling shutter
-		 * effect in use but rolling shutter duration is set to 0.0.
-		 */
-		const int shutter_table_offset = kernel_data.cam.shutter_table_offset;
-		ray->time = lookup_table_read(kg, time, shutter_table_offset, SHUTTER_TABLE_SIZE);
-		/* TODO(sergey): Currently single rolling shutter effect type only
-		 * where scanlines are acquired from top to bottom and whole scanline
-		 * is acquired at once (no delay in acquisition happens between pixels
-		 * of single scanline).
-		 *
-		 * Might want to support more models in the future.
-		 */
-		if(kernel_data.cam.rolling_shutter_type) {
-			/* Time corresponding to a fully rolling shutter only effect:
-			 * top of the frame is time 0.0, bottom of the frame is time 1.0.
-			 */
-			const float time = 1.0f - (float)y / kernel_data.cam.height;
-			const float duration = kernel_data.cam.rolling_shutter_duration;
-			if(duration != 0.0f) {
-				/* This isn't fully physical correct, but lets us to have simple
-				 * controls in the interface. The idea here is basically sort of
-				 * linear interpolation between how much rolling shutter effect
-				 * exist on the frame and how much of it is a motion blur effect.
-				 */
-				ray->time = (ray->time - 0.5f) * duration;
-				ray->time += (time - 0.5f) * (1.0f - duration) + 0.5f;
-			}
-			else {
-				ray->time = time;
-			}
-		}
-	}
+  /* motion blur */
+  if (kernel_data.cam.shuttertime == -1.0f) {
+    ray->time = 0.5f;
+  }
+  else {
+    /* TODO(sergey): Such lookup is unneeded when there's rolling shutter
+     * effect in use but rolling shutter duration is set to 0.0.
+     */
+    const int shutter_table_offset = kernel_data.cam.shutter_table_offset;
+    ray->time = lookup_table_read(kg, time, shutter_table_offset, SHUTTER_TABLE_SIZE);
+    /* TODO(sergey): Currently single rolling shutter effect type only
+     * where scanlines are acquired from top to bottom and whole scanline
+     * is acquired at once (no delay in acquisition happens between pixels
+     * of single scanline).
+     *
+     * Might want to support more models in the future.
+     */
+    if (kernel_data.cam.rolling_shutter_type) {
+      /* Time corresponding to a fully rolling shutter only effect:
+       * top of the frame is time 0.0, bottom of the frame is time 1.0.
+       */
+      const float time = 1.0f - (float)y / kernel_data.cam.height;
+      const float duration = kernel_data.cam.rolling_shutter_duration;
+      if (duration != 0.0f) {
+        /* This isn't fully physical correct, but lets us to have simple
+         * controls in the interface. The idea here is basically sort of
+         * linear interpolation between how much rolling shutter effect
+         * exist on the frame and how much of it is a motion blur effect.
+         */
+        ray->time = (ray->time - 0.5f) * duration;
+        ray->time += (time - 0.5f) * (1.0f - duration) + 0.5f;
+      }
+      else {
+        ray->time = time;
+      }
+    }
+  }
 #endif
 
-	/* sample */
-	if(kernel_data.cam.type == CAMERA_PERSPECTIVE) {
-		camera_sample_perspective(kg, raster_x, raster_y, lens_u, lens_v, ray);
-	}
-	else if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
-		camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray);
-	}
-	else {
-		const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion);
-		camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray);
-	}
+  /* sample */
+  if (kernel_data.cam.type == CAMERA_PERSPECTIVE) {
+    camera_sample_perspective(kg, raster_x, raster_y, lens_u, lens_v, ray);
+  }
+  else if (kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
+    camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray);
+  }
+  else {
+    const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion);
+    camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray);
+  }
 }
 
 /* Utilities */
 
 ccl_device_inline float3 camera_position(KernelGlobals *kg)
 {
-	Transform cameratoworld = kernel_data.cam.cameratoworld;
-	return make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
+  Transform cameratoworld = kernel_data.cam.cameratoworld;
+  return make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
 }
 
 ccl_device_inline float camera_distance(KernelGlobals *kg, float3 P)
 {
-	Transform cameratoworld = kernel_data.cam.cameratoworld;
-	float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
-
-	if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
-		float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z);
-		return fabsf(dot((P - camP), camD));
-	}
-	else
-		return len(P - camP);
+  Transform cameratoworld = kernel_data.cam.cameratoworld;
+  float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
+
+  if (kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
+    float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z);
+    return fabsf(dot((P - camP), camD));
+  }
+  else
+    return len(P - camP);
 }
 
 ccl_device_inline float3 camera_direction_from_point(KernelGlobals *kg, float3 P)
 {
-	Transform cameratoworld = kernel_data.cam.cameratoworld;
-
-	if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
-		float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z);
-		return -camD;
-	}
-	else {
-		float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
-		return normalize(camP - P);
-	}
+  Transform cameratoworld = kernel_data.cam.cameratoworld;
+
+  if (kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
+    float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z);
+    return -camD;
+  }
+  else {
+    float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
+    return normalize(camP - P);
+  }
 }
 
 ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd, float3 P)
 {
-	if(kernel_data.cam.type != CAMERA_PANORAMA) {
-		/* perspective / ortho */
-		if(sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE)
-			P += camera_position(kg);
-
-		ProjectionTransform tfm = kernel_data.cam.worldtondc;
-		return transform_perspective(&tfm, P);
-	}
-	else {
-		/* panorama */
-		Transform tfm = kernel_data.cam.worldtocamera;
-
-		if(sd->object != OBJECT_NONE)
-			P = normalize(transform_point(&tfm, P));
-		else
-			P = normalize(transform_direction(&tfm, P));
-
-		float2 uv = direction_to_panorama(&kernel_data.cam, P);
-
-		return make_float3(uv.x, uv.y, 0.0f);
-	}
+  if (kernel_data.cam.type != CAMERA_PANORAMA) {
+    /* perspective / ortho */
+    if (sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE)
+      P += camera_position(kg);
+
+    ProjectionTransform tfm = kernel_data.cam.worldtondc;
+    return transform_perspective(&tfm, P);
+  }
+  else {
+    /* panorama */
+    Transform tfm = kernel_data.cam.worldtocamera;
+
+    if (sd->object != OBJECT_NONE)
+      P = normalize(transform_point(&tfm, P));
+    else
+      P = normalize(transform_direction(&tfm, P));
+
+    float2 uv = direction_to_panorama(&kernel_data.cam, P);
+
+    return make_float3(uv.x, uv.y, 0.0f);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_color.h b/intern/cycles/kernel/kernel_color.h
index ea478a8a5d3..5eb1bdad02e 100644
--- a/intern/cycles/kernel/kernel_color.h
+++ b/intern/cycles/kernel/kernel_color.h
@@ -23,16 +23,16 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device float3 xyz_to_rgb(KernelGlobals *kg, float3 xyz)
 {
-    return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz),
-                       dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz),
-                       dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz));
+  return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz),
+                     dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz),
+                     dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz));
 }
 
 ccl_device float linear_rgb_to_gray(KernelGlobals *kg, float3 c)
 {
-    return dot(c, float4_to_float3(kernel_data.film.rgb_to_y));
+  return dot(c, float4_to_float3(kernel_data.film.rgb_to_y));
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_COLOR_H__ */
+#endif /* __KERNEL_COLOR_H__ */
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 4ee80850402..e8fedca4489 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -55,9 +55,9 @@
 /* On x86_64, versions of glibc < 2.16 have an issue where expf is
  * much slower than the double version.  This was fixed in glibc 2.16.
  */
-#if !defined(__KERNEL_GPU__)  && defined(__x86_64__) && defined(__x86_64__) && \
-     defined(__GNU_LIBRARY__) && defined(__GLIBC__ ) && defined(__GLIBC_MINOR__) && \
-     (__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16)
+#if !defined(__KERNEL_GPU__) && defined(__x86_64__) && defined(__x86_64__) && \
+    defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
+    (__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16)
 #  define expf(x) ((float)exp((double)(x)))
 #endif
 
@@ -71,41 +71,41 @@ CCL_NAMESPACE_BEGIN
 /* Texture types to be compatible with CUDA textures. These are really just
  * simple arrays and after inlining fetch hopefully revert to being a simple
  * pointer lookup. */
-template<typename T> struct texture  {
-	ccl_always_inline const T& fetch(int index)
-	{
-		kernel_assert(index >= 0 && index < width);
-		return data[index];
-	}
+template<typename T> struct texture {
+  ccl_always_inline const T &fetch(int index)
+  {
+    kernel_assert(index >= 0 && index < width);
+    return data[index];
+  }
 #if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
-	/* Reads 256 bytes but indexes in blocks of 128 bytes to maintain
-	 * compatibility with existing indicies and data structures.
-	 */
-	ccl_always_inline avxf fetch_avxf(const int index)
-	{
-		kernel_assert(index >= 0 && (index+1) < width);
-		ssef *ssef_data = (ssef*)data;
-		ssef *ssef_node_data = &ssef_data[index];
-		return _mm256_loadu_ps((float *)ssef_node_data);
-	}
+  /* Reads 256 bytes but indexes in blocks of 128 bytes to maintain
+   * compatibility with existing indicies and data structures.
+   */
+  ccl_always_inline avxf fetch_avxf(const int index)
+  {
+    kernel_assert(index >= 0 && (index + 1) < width);
+    ssef *ssef_data = (ssef *)data;
+    ssef *ssef_node_data = &ssef_data[index];
+    return _mm256_loadu_ps((float *)ssef_node_data);
+  }
 #endif
 
 #ifdef __KERNEL_SSE2__
-	ccl_always_inline ssef fetch_ssef(int index)
-	{
-		kernel_assert(index >= 0 && index < width);
-		return ((ssef*)data)[index];
-	}
-
-	ccl_always_inline ssei fetch_ssei(int index)
-	{
-		kernel_assert(index >= 0 && index < width);
-		return ((ssei*)data)[index];
-	}
+  ccl_always_inline ssef fetch_ssef(int index)
+  {
+    kernel_assert(index >= 0 && index < width);
+    return ((ssef *)data)[index];
+  }
+
+  ccl_always_inline ssei fetch_ssei(int index)
+  {
+    kernel_assert(index >= 0 && index < width);
+    return ((ssei *)data)[index];
+  }
 #endif
 
-	T *data;
-	int width;
+  T *data;
+  int width;
 };
 
 /* Macros to handle different memory storage on different devices */
@@ -124,33 +124,33 @@ typedef vector3<sseb> sse3b;
 typedef vector3<ssef> sse3f;
 typedef vector3<ssei> sse3i;
 
-ccl_device_inline void print_sse3b(const char *label, sse3b& a)
+ccl_device_inline void print_sse3b(const char *label, sse3b &a)
 {
-	print_sseb(label, a.x);
-	print_sseb(label, a.y);
-	print_sseb(label, a.z);
+  print_sseb(label, a.x);
+  print_sseb(label, a.y);
+  print_sseb(label, a.z);
 }
 
-ccl_device_inline void print_sse3f(const char *label, sse3f& a)
+ccl_device_inline void print_sse3f(const char *label, sse3f &a)
 {
-	print_ssef(label, a.x);
-	print_ssef(label, a.y);
-	print_ssef(label, a.z);
+  print_ssef(label, a.x);
+  print_ssef(label, a.y);
+  print_ssef(label, a.z);
 }
 
-ccl_device_inline void print_sse3i(const char *label, sse3i& a)
+ccl_device_inline void print_sse3i(const char *label, sse3i &a)
 {
-	print_ssei(label, a.x);
-	print_ssei(label, a.y);
-	print_ssei(label, a.z);
+  print_ssei(label, a.x);
+  print_ssei(label, a.y);
+  print_ssei(label, a.z);
 }
 
-#if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
+#  if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
 typedef vector3<avxf> avx3f;
-#endif
+#  endif
 
 #endif
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_COMPAT_CPU_H__ */
+#endif /* __KERNEL_COMPAT_CPU_H__ */
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 8ed96bbae64..469b81d120b 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -42,22 +42,22 @@ typedef unsigned long long CUtexObject;
 
 __device__ half __float2half(const float f)
 {
-       half val;
-       asm("{  cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
-       return val;
+  half val;
+  asm("{  cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
+  return val;
 }
 
 /* Qualifier wrappers for different names on different devices */
 
-#define ccl_device  __device__ __inline__
+#define ccl_device __device__ __inline__
 #if __CUDA_ARCH__ < 500
-#  define ccl_device_inline  __device__ __forceinline__
-#  define ccl_device_forceinline  __device__ __forceinline__
+#  define ccl_device_inline __device__ __forceinline__
+#  define ccl_device_forceinline __device__ __forceinline__
 #else
-#  define ccl_device_inline  __device__ __inline__
-#  define ccl_device_forceinline  __device__ __forceinline__
+#  define ccl_device_inline __device__ __inline__
+#  define ccl_device_forceinline __device__ __forceinline__
 #endif
-#define ccl_device_noinline  __device__ __noinline__
+#define ccl_device_noinline __device__ __noinline__
 #define ccl_global
 #define ccl_static_constant __constant__
 #define ccl_constant const
@@ -75,8 +75,7 @@ __device__ half __float2half(const float f)
 
 #define ATTR_FALLTHROUGH
 
-#define CCL_MAX_LOCAL_SIZE (CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH)
-
+#define CCL_MAX_LOCAL_SIZE (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH)
 
 /* No assert supported for CUDA */
 
@@ -91,46 +90,62 @@ __device__ half __float2half(const float f)
 
 ccl_device_inline uint ccl_local_id(uint d)
 {
-	switch(d) {
-		case 0: return threadIdx.x;
-		case 1: return threadIdx.y;
-		case 2: return threadIdx.z;
-		default: return 0;
-	}
+  switch (d) {
+    case 0:
+      return threadIdx.x;
+    case 1:
+      return threadIdx.y;
+    case 2:
+      return threadIdx.z;
+    default:
+      return 0;
+  }
 }
 
 #define ccl_global_id(d) (ccl_group_id(d) * ccl_local_size(d) + ccl_local_id(d))
 
 ccl_device_inline uint ccl_local_size(uint d)
 {
-	switch(d) {
-		case 0: return blockDim.x;
-		case 1: return blockDim.y;
-		case 2: return blockDim.z;
-		default: return 0;
-	}
+  switch (d) {
+    case 0:
+      return blockDim.x;
+    case 1:
+      return blockDim.y;
+    case 2:
+      return blockDim.z;
+    default:
+      return 0;
+  }
 }
 
 #define ccl_global_size(d) (ccl_num_groups(d) * ccl_local_size(d))
 
 ccl_device_inline uint ccl_group_id(uint d)
 {
-	switch(d) {
-		case 0: return blockIdx.x;
-		case 1: return blockIdx.y;
-		case 2: return blockIdx.z;
-		default: return 0;
-	}
+  switch (d) {
+    case 0:
+      return blockIdx.x;
+    case 1:
+      return blockIdx.y;
+    case 2:
+      return blockIdx.z;
+    default:
+      return 0;
+  }
 }
 
 ccl_device_inline uint ccl_num_groups(uint d)
 {
-	switch(d) {
-		case 0: return gridDim.x;
-		case 1: return gridDim.y;
-		case 2: return gridDim.z;
-		default: return 0;
-	}
+  switch (d) {
+    case 0:
+      return gridDim.x;
+    case 1:
+      return gridDim.y;
+    case 2:
+      return gridDim.z;
+    default:
+      return 0;
+  }
 }
 
 /* Textures */
@@ -150,4 +165,4 @@ ccl_device_inline uint ccl_num_groups(uint d)
 #define logf(x) __logf(((float)(x)))
 #define expf(x) __expf(((float)(x)))
 
-#endif  /* __KERNEL_COMPAT_CUDA_H__ */
+#endif /* __KERNEL_COMPAT_CUDA_H__ */
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index d3d0934a626..e040ea88d7c 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -134,7 +134,7 @@
 #  define expf(x) native_exp(((float)(x)))
 #  define sqrtf(x) native_sqrt(((float)(x)))
 #  define logf(x) native_log(((float)(x)))
-#  define rcp(x)  native_recip(x)
+#  define rcp(x) native_recip(x)
 #else
 #  define sinf(x) sin(((float)(x)))
 #  define cosf(x) cos(((float)(x)))
@@ -142,12 +142,13 @@
 #  define expf(x) exp(((float)(x)))
 #  define sqrtf(x) sqrt(((float)(x)))
 #  define logf(x) log(((float)(x)))
-#  define rcp(x)  recip(x)
+#  define rcp(x) recip(x)
 #endif
 
 /* data lookup defines */
 #define kernel_data (*kg->data)
-#define kernel_tex_array(tex) ((const ccl_global tex##_t*)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data))
+#define kernel_tex_array(tex) \
+  ((const ccl_global tex##_t *)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data))
 #define kernel_tex_fetch(tex, index) kernel_tex_array(tex)[(index)]
 
 /* define NULL */
@@ -155,10 +156,10 @@
 
 /* enable extensions */
 #ifdef __KERNEL_CL_KHR_FP16__
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#  pragma OPENCL EXTENSION cl_khr_fp16 : enable
 #endif
 
 #include "util/util_half.h"
 #include "util/util_types.h"
 
-#endif  /* __KERNEL_COMPAT_OPENCL_H__ */
+#endif /* __KERNEL_COMPAT_OPENCL_H__ */
diff --git a/intern/cycles/kernel/kernel_differential.h b/intern/cycles/kernel/kernel_differential.h
index 924d96c72e5..8513de0d843 100644
--- a/intern/cycles/kernel/kernel_differential.h
+++ b/intern/cycles/kernel/kernel_differential.h
@@ -18,88 +18,98 @@ CCL_NAMESPACE_BEGIN
 
 /* See "Tracing Ray Differentials", Homan Igehy, 1999. */
 
-ccl_device void differential_transfer(ccl_addr_space differential3 *dP_, const differential3 dP, float3 D, const differential3 dD, float3 Ng, float t)
+ccl_device void differential_transfer(ccl_addr_space differential3 *dP_,
+                                      const differential3 dP,
+                                      float3 D,
+                                      const differential3 dD,
+                                      float3 Ng,
+                                      float t)
 {
-	/* ray differential transfer through homogeneous medium, to
-	 * compute dPdx/dy at a shading point from the incoming ray */
+  /* ray differential transfer through homogeneous medium, to
+   * compute dPdx/dy at a shading point from the incoming ray */
 
-	float3 tmp = D/dot(D, Ng);
-	float3 tmpx = dP.dx + t*dD.dx;
-	float3 tmpy = dP.dy + t*dD.dy;
+  float3 tmp = D / dot(D, Ng);
+  float3 tmpx = dP.dx + t * dD.dx;
+  float3 tmpy = dP.dy + t * dD.dy;
 
-	dP_->dx = tmpx - dot(tmpx, Ng)*tmp;
-	dP_->dy = tmpy - dot(tmpy, Ng)*tmp;
+  dP_->dx = tmpx - dot(tmpx, Ng) * tmp;
+  dP_->dy = tmpy - dot(tmpy, Ng) * tmp;
 }
 
 ccl_device void differential_incoming(ccl_addr_space differential3 *dI, const differential3 dD)
 {
-	/* compute dIdx/dy at a shading point, we just need to negate the
-	 * differential of the ray direction */
+  /* compute dIdx/dy at a shading point, we just need to negate the
+   * differential of the ray direction */
 
-	dI->dx = -dD.dx;
-	dI->dy = -dD.dy;
+  dI->dx = -dD.dx;
+  dI->dy = -dD.dy;
 }
 
-ccl_device void differential_dudv(ccl_addr_space differential *du, ccl_addr_space differential *dv, float3 dPdu, float3 dPdv, differential3 dP, float3 Ng)
+ccl_device void differential_dudv(ccl_addr_space differential *du,
+                                  ccl_addr_space differential *dv,
+                                  float3 dPdu,
+                                  float3 dPdv,
+                                  differential3 dP,
+                                  float3 Ng)
 {
-	/* now we have dPdx/dy from the ray differential transfer, and dPdu/dv
-	 * from the primitive, we can compute dudx/dy and dvdx/dy. these are
-	 * mainly used for differentials of arbitrary mesh attributes. */
-
-	/* find most stable axis to project to 2D */
-	float xn = fabsf(Ng.x);
-	float yn = fabsf(Ng.y);
-	float zn = fabsf(Ng.z);
-
-	if(zn < xn || zn < yn) {
-		if(yn < xn || yn < zn) {
-			dPdu.x = dPdu.y;
-			dPdv.x = dPdv.y;
-			dP.dx.x = dP.dx.y;
-			dP.dy.x = dP.dy.y;
-		}
-
-		dPdu.y = dPdu.z;
-		dPdv.y = dPdv.z;
-		dP.dx.y = dP.dx.z;
-		dP.dy.y = dP.dy.z;
-	}
-
-	/* using Cramer's rule, we solve for dudx and dvdx in a 2x2 linear system,
-	 * and the same for dudy and dvdy. the denominator is the same for both
-	 * solutions, so we compute it only once.
-	 *
-	 * dP.dx = dPdu * dudx + dPdv * dvdx;
-	 * dP.dy = dPdu * dudy + dPdv * dvdy; */
-
-	float det = (dPdu.x*dPdv.y - dPdv.x*dPdu.y);
-
-	if(det != 0.0f)
-		det = 1.0f/det;
-
-	du->dx = (dP.dx.x*dPdv.y - dP.dx.y*dPdv.x)*det;
-	dv->dx = (dP.dx.y*dPdu.x - dP.dx.x*dPdu.y)*det;
-
-	du->dy = (dP.dy.x*dPdv.y - dP.dy.y*dPdv.x)*det;
-	dv->dy = (dP.dy.y*dPdu.x - dP.dy.x*dPdu.y)*det;
+  /* now we have dPdx/dy from the ray differential transfer, and dPdu/dv
+   * from the primitive, we can compute dudx/dy and dvdx/dy. these are
+   * mainly used for differentials of arbitrary mesh attributes. */
+
+  /* find most stable axis to project to 2D */
+  float xn = fabsf(Ng.x);
+  float yn = fabsf(Ng.y);
+  float zn = fabsf(Ng.z);
+
+  if (zn < xn || zn < yn) {
+    if (yn < xn || yn < zn) {
+      dPdu.x = dPdu.y;
+      dPdv.x = dPdv.y;
+      dP.dx.x = dP.dx.y;
+      dP.dy.x = dP.dy.y;
+    }
+
+    dPdu.y = dPdu.z;
+    dPdv.y = dPdv.z;
+    dP.dx.y = dP.dx.z;
+    dP.dy.y = dP.dy.z;
+  }
+
+  /* using Cramer's rule, we solve for dudx and dvdx in a 2x2 linear system,
+   * and the same for dudy and dvdy. the denominator is the same for both
+   * solutions, so we compute it only once.
+   *
+   * dP.dx = dPdu * dudx + dPdv * dvdx;
+   * dP.dy = dPdu * dudy + dPdv * dvdy; */
+
+  float det = (dPdu.x * dPdv.y - dPdv.x * dPdu.y);
+
+  if (det != 0.0f)
+    det = 1.0f / det;
+
+  du->dx = (dP.dx.x * dPdv.y - dP.dx.y * dPdv.x) * det;
+  dv->dx = (dP.dx.y * dPdu.x - dP.dx.x * dPdu.y) * det;
+
+  du->dy = (dP.dy.x * dPdv.y - dP.dy.y * dPdv.x) * det;
+  dv->dy = (dP.dy.y * dPdu.x - dP.dy.x * dPdu.y) * det;
 }
 
 ccl_device differential differential_zero()
 {
-	differential d;
-	d.dx = 0.0f;
-	d.dy = 0.0f;
+  differential d;
+  d.dx = 0.0f;
+  d.dy = 0.0f;
 
-	return d;
+  return d;
 }
 
 ccl_device differential3 differential3_zero()
 {
-	differential3 d;
-	d.dx = make_float3(0.0f, 0.0f, 0.0f);
-	d.dy = make_float3(0.0f, 0.0f, 0.0f);
+  differential3 d;
+  d.dx = make_float3(0.0f, 0.0f, 0.0f);
+  d.dy = make_float3(0.0f, 0.0f, 0.0f);
 
-	return d;
+  return d;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 80bb8d48caf..f2eaa7b50a5 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -26,61 +26,71 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
                                                 float t,
                                                 float time)
 {
-	/* setup shading at emitter */
-	float3 eval;
-
-	if(shader_constant_emission_eval(kg, ls->shader, &eval)) {
-		if((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) {
-			ls->Ng = -ls->Ng;
-		}
-	}
-	else {
-		/* Setup shader data and call shader_eval_surface once, better
-		 * for GPU coherence and compile times. */
+  /* setup shading at emitter */
+  float3 eval;
+
+  if (shader_constant_emission_eval(kg, ls->shader, &eval)) {
+    if ((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) {
+      ls->Ng = -ls->Ng;
+    }
+  }
+  else {
+    /* Setup shader data and call shader_eval_surface once, better
+     * for GPU coherence and compile times. */
 #ifdef __BACKGROUND_MIS__
-		if(ls->type == LIGHT_BACKGROUND) {
-			Ray ray;
-			ray.D = ls->D;
-			ray.P = ls->P;
-			ray.t = 1.0f;
-			ray.time = time;
-			ray.dP = differential3_zero();
-			ray.dD = dI;
-
-			shader_setup_from_background(kg, emission_sd, &ray);
-		}
-		else
+    if (ls->type == LIGHT_BACKGROUND) {
+      Ray ray;
+      ray.D = ls->D;
+      ray.P = ls->P;
+      ray.t = 1.0f;
+      ray.time = time;
+      ray.dP = differential3_zero();
+      ray.dD = dI;
+
+      shader_setup_from_background(kg, emission_sd, &ray);
+    }
+    else
 #endif
-		{
-			shader_setup_from_sample(kg, emission_sd,
-			                         ls->P, ls->Ng, I,
-			                         ls->shader, ls->object, ls->prim,
-			                         ls->u, ls->v, t, time, false, ls->lamp);
-
-			ls->Ng = emission_sd->Ng;
-		}
-
-		/* No proper path flag, we're evaluating this for all closures. that's
-		 * weak but we'd have to do multiple evaluations otherwise. */
-		path_state_modify_bounce(state, true);
-		shader_eval_surface(kg, emission_sd, state, PATH_RAY_EMISSION);
-		path_state_modify_bounce(state, false);
-
-		/* Evaluate closures. */
+    {
+      shader_setup_from_sample(kg,
+                               emission_sd,
+                               ls->P,
+                               ls->Ng,
+                               I,
+                               ls->shader,
+                               ls->object,
+                               ls->prim,
+                               ls->u,
+                               ls->v,
+                               t,
+                               time,
+                               false,
+                               ls->lamp);
+
+      ls->Ng = emission_sd->Ng;
+    }
+
+    /* No proper path flag, we're evaluating this for all closures. that's
+     * weak but we'd have to do multiple evaluations otherwise. */
+    path_state_modify_bounce(state, true);
+    shader_eval_surface(kg, emission_sd, state, PATH_RAY_EMISSION);
+    path_state_modify_bounce(state, false);
+
+    /* Evaluate closures. */
 #ifdef __BACKGROUND_MIS__
-		if (ls->type == LIGHT_BACKGROUND) {
-			eval = shader_background_eval(emission_sd);
-		}
-		else
+    if (ls->type == LIGHT_BACKGROUND) {
+      eval = shader_background_eval(emission_sd);
+    }
+    else
 #endif
-		{
-			eval = shader_emissive_eval(emission_sd);
-		}
-	}
+    {
+      eval = shader_emissive_eval(emission_sd);
+    }
+  }
 
-	eval *= ls->eval_fac;
+  eval *= ls->eval_fac;
 
-	return eval;
+  return eval;
 }
 
 ccl_device_noinline bool direct_emission(KernelGlobals *kg,
@@ -93,132 +103,128 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
                                          bool *is_lamp,
                                          float rand_terminate)
 {
-	if(ls->pdf == 0.0f)
-		return false;
+  if (ls->pdf == 0.0f)
+    return false;
 
-	/* todo: implement */
-	differential3 dD = differential3_zero();
+  /* todo: implement */
+  differential3 dD = differential3_zero();
 
-	/* evaluate closure */
+  /* evaluate closure */
 
-	float3 light_eval = direct_emissive_eval(kg,
-	                                         emission_sd,
-	                                         ls,
-	                                         state,
-	                                         -ls->D,
-	                                         dD,
-	                                         ls->t,
-	                                         sd->time);
+  float3 light_eval = direct_emissive_eval(
+      kg, emission_sd, ls, state, -ls->D, dD, ls->t, sd->time);
 
-	if(is_zero(light_eval))
-		return false;
+  if (is_zero(light_eval))
+    return false;
 
-	/* evaluate BSDF at shading point */
+    /* evaluate BSDF at shading point */
 
 #ifdef __VOLUME__
-	if(sd->prim != PRIM_NONE)
-		shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS);
-	else {
-		float bsdf_pdf;
-		shader_volume_phase_eval(kg, sd, ls->D, eval, &bsdf_pdf);
-		if(ls->shader & SHADER_USE_MIS) {
-			/* Multiple importance sampling. */
-			float mis_weight = power_heuristic(ls->pdf, bsdf_pdf);
-			light_eval *= mis_weight;
-		}
-	}
+  if (sd->prim != PRIM_NONE)
+    shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS);
+  else {
+    float bsdf_pdf;
+    shader_volume_phase_eval(kg, sd, ls->D, eval, &bsdf_pdf);
+    if (ls->shader & SHADER_USE_MIS) {
+      /* Multiple importance sampling. */
+      float mis_weight = power_heuristic(ls->pdf, bsdf_pdf);
+      light_eval *= mis_weight;
+    }
+  }
 #else
-	shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS);
+  shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS);
 #endif
 
-	bsdf_eval_mul3(eval, light_eval/ls->pdf);
+  bsdf_eval_mul3(eval, light_eval / ls->pdf);
 
 #ifdef __PASSES__
-	/* use visibility flag to skip lights */
-	if(ls->shader & SHADER_EXCLUDE_ANY) {
-		if(ls->shader & SHADER_EXCLUDE_DIFFUSE) {
-			eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
-			eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
-		}
-		if(ls->shader & SHADER_EXCLUDE_GLOSSY)
-			eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
-		if(ls->shader & SHADER_EXCLUDE_TRANSMIT)
-			eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
-		if(ls->shader & SHADER_EXCLUDE_SCATTER)
-			eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
-	}
+  /* use visibility flag to skip lights */
+  if (ls->shader & SHADER_EXCLUDE_ANY) {
+    if (ls->shader & SHADER_EXCLUDE_DIFFUSE) {
+      eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
+      eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
+    }
+    if (ls->shader & SHADER_EXCLUDE_GLOSSY)
+      eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
+    if (ls->shader & SHADER_EXCLUDE_TRANSMIT)
+      eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
+    if (ls->shader & SHADER_EXCLUDE_SCATTER)
+      eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
+  }
 #endif
 
-	if(bsdf_eval_is_zero(eval))
-		return false;
+  if (bsdf_eval_is_zero(eval))
+    return false;
 
-	if(kernel_data.integrator.light_inv_rr_threshold > 0.0f
+  if (kernel_data.integrator.light_inv_rr_threshold > 0.0f
 #ifdef __SHADOW_TRICKS__
-	   && (state->flag & PATH_RAY_SHADOW_CATCHER) == 0
+      && (state->flag & PATH_RAY_SHADOW_CATCHER) == 0
 #endif
-	  )
-	{
-		float probability = max3(fabs(bsdf_eval_sum(eval))) * kernel_data.integrator.light_inv_rr_threshold;
-		if(probability < 1.0f) {
-			if(rand_terminate >= probability) {
-				return false;
-			}
-			bsdf_eval_mul(eval, 1.0f / probability);
-		}
-	}
-
-	if(ls->shader & SHADER_CAST_SHADOW) {
-		/* setup ray */
-		bool transmit = (dot(sd->Ng, ls->D) < 0.0f);
-		ray->P = ray_offset(sd->P, (transmit)? -sd->Ng: sd->Ng);
-
-		if(ls->t == FLT_MAX) {
-			/* distant light */
-			ray->D = ls->D;
-			ray->t = ls->t;
-		}
-		else {
-			/* other lights, avoid self-intersection */
-			ray->D = ray_offset(ls->P, ls->Ng) - ray->P;
-			ray->D = normalize_len(ray->D, &ray->t);
-		}
-
-		ray->dP = sd->dP;
-		ray->dD = differential3_zero();
-	}
-	else {
-		/* signal to not cast shadow ray */
-		ray->t = 0.0f;
-	}
-
-	/* return if it's a lamp for shadow pass */
-	*is_lamp = (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND);
-
-	return true;
+  ) {
+    float probability = max3(fabs(bsdf_eval_sum(eval))) *
+                        kernel_data.integrator.light_inv_rr_threshold;
+    if (probability < 1.0f) {
+      if (rand_terminate >= probability) {
+        return false;
+      }
+      bsdf_eval_mul(eval, 1.0f / probability);
+    }
+  }
+
+  if (ls->shader & SHADER_CAST_SHADOW) {
+    /* setup ray */
+    bool transmit = (dot(sd->Ng, ls->D) < 0.0f);
+    ray->P = ray_offset(sd->P, (transmit) ? -sd->Ng : sd->Ng);
+
+    if (ls->t == FLT_MAX) {
+      /* distant light */
+      ray->D = ls->D;
+      ray->t = ls->t;
+    }
+    else {
+      /* other lights, avoid self-intersection */
+      ray->D = ray_offset(ls->P, ls->Ng) - ray->P;
+      ray->D = normalize_len(ray->D, &ray->t);
+    }
+
+    ray->dP = sd->dP;
+    ray->dD = differential3_zero();
+  }
+  else {
+    /* signal to not cast shadow ray */
+    ray->t = 0.0f;
+  }
+
+  /* return if it's a lamp for shadow pass */
+  *is_lamp = (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND);
+
+  return true;
 }
 
 /* Indirect Primitive Emission */
 
-ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, ShaderData *sd, float t, int path_flag, float bsdf_pdf)
+ccl_device_noinline float3 indirect_primitive_emission(
+    KernelGlobals *kg, ShaderData *sd, float t, int path_flag, float bsdf_pdf)
 {
-	/* evaluate emissive closure */
-	float3 L = shader_emissive_eval(sd);
+  /* evaluate emissive closure */
+  float3 L = shader_emissive_eval(sd);
 
 #ifdef __HAIR__
-	if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_ALL_TRIANGLE))
+  if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) &&
+      (sd->type & PRIMITIVE_ALL_TRIANGLE))
 #else
-	if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
+  if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
 #endif
-	{
-		/* multiple importance sampling, get triangle light pdf,
-		 * and compute weight with respect to BSDF pdf */
-		float pdf = triangle_light_pdf(kg, sd, t);
-		float mis_weight = power_heuristic(bsdf_pdf, pdf);
+  {
+    /* multiple importance sampling, get triangle light pdf,
+     * and compute weight with respect to BSDF pdf */
+    float pdf = triangle_light_pdf(kg, sd, t);
+    float mis_weight = power_heuristic(bsdf_pdf, pdf);
 
-		return L*mis_weight;
-	}
+    return L * mis_weight;
+  }
 
-	return L;
+  return L;
 }
 
 /* Indirect Lamp Emission */
@@ -229,60 +235,55 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
                                                 Ray *ray,
                                                 float3 *emission)
 {
-	bool hit_lamp = false;
+  bool hit_lamp = false;
 
-	*emission = make_float3(0.0f, 0.0f, 0.0f);
+  *emission = make_float3(0.0f, 0.0f, 0.0f);
 
-	for(int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
-		LightSample ls;
+  for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
+    LightSample ls;
 
-		if(!lamp_light_eval(kg, lamp, ray->P, ray->D, ray->t, &ls))
-			continue;
+    if (!lamp_light_eval(kg, lamp, ray->P, ray->D, ray->t, &ls))
+      continue;
 
 #ifdef __PASSES__
-		/* use visibility flag to skip lights */
-		if(ls.shader & SHADER_EXCLUDE_ANY) {
-			if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
-			   ((ls.shader & SHADER_EXCLUDE_GLOSSY) &&
-			    ((state->flag & (PATH_RAY_GLOSSY|PATH_RAY_REFLECT)) == (PATH_RAY_GLOSSY|PATH_RAY_REFLECT))) ||
-			   ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
-			   ((ls.shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
-				continue;
-		}
+    /* use visibility flag to skip lights */
+    if (ls.shader & SHADER_EXCLUDE_ANY) {
+      if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
+          ((ls.shader & SHADER_EXCLUDE_GLOSSY) &&
+           ((state->flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
+            (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
+          ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
+          ((ls.shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
+        continue;
+    }
 #endif
 
-		float3 L = direct_emissive_eval(kg,
-		                                emission_sd,
-		                                &ls,
-		                                state,
-		                                -ray->D,
-		                                ray->dD,
-		                                ls.t,
-		                                ray->time);
+    float3 L = direct_emissive_eval(
+        kg, emission_sd, &ls, state, -ray->D, ray->dD, ls.t, ray->time);
 
 #ifdef __VOLUME__
-		if(state->volume_stack[0].shader != SHADER_NONE) {
-			/* shadow attenuation */
-			Ray volume_ray = *ray;
-			volume_ray.t = ls.t;
-			float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
-			kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
-			L *= volume_tp;
-		}
+    if (state->volume_stack[0].shader != SHADER_NONE) {
+      /* shadow attenuation */
+      Ray volume_ray = *ray;
+      volume_ray.t = ls.t;
+      float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
+      kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
+      L *= volume_tp;
+    }
 #endif
 
-		if(!(state->flag & PATH_RAY_MIS_SKIP)) {
-			/* multiple importance sampling, get regular light pdf,
-			 * and compute weight with respect to BSDF pdf */
-			float mis_weight = power_heuristic(state->ray_pdf, ls.pdf);
-			L *= mis_weight;
-		}
+    if (!(state->flag & PATH_RAY_MIS_SKIP)) {
+      /* multiple importance sampling, get regular light pdf,
+       * and compute weight with respect to BSDF pdf */
+      float mis_weight = power_heuristic(state->ray_pdf, ls.pdf);
+      L *= mis_weight;
+    }
 
-		*emission += L;
-		hit_lamp = true;
-	}
+    *emission += L;
+    hit_lamp = true;
+  }
 
-	return hit_lamp;
+  return hit_lamp;
 }
 
 /* Indirect Background */
@@ -293,55 +294,55 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
                                                ccl_addr_space Ray *ray)
 {
 #ifdef __BACKGROUND__
-	int shader = kernel_data.background.surface_shader;
-
-	/* Use visibility flag to skip lights. */
-	if(shader & SHADER_EXCLUDE_ANY) {
-		if(((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
-		   ((shader & SHADER_EXCLUDE_GLOSSY) &&
-		    ((state->flag & (PATH_RAY_GLOSSY|PATH_RAY_REFLECT)) == (PATH_RAY_GLOSSY|PATH_RAY_REFLECT))) ||
-		   ((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
-		   ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)) ||
-		   ((shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
-			return make_float3(0.0f, 0.0f, 0.0f);
-	}
-
-
-	/* Evaluate background shader. */
-	float3 L;
-	if(!shader_constant_emission_eval(kg, shader, &L)) {
+  int shader = kernel_data.background.surface_shader;
+
+  /* Use visibility flag to skip lights. */
+  if (shader & SHADER_EXCLUDE_ANY) {
+    if (((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
+        ((shader & SHADER_EXCLUDE_GLOSSY) &&
+         ((state->flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
+          (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
+        ((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
+        ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)) ||
+        ((shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
+      return make_float3(0.0f, 0.0f, 0.0f);
+  }
+
+  /* Evaluate background shader. */
+  float3 L;
+  if (!shader_constant_emission_eval(kg, shader, &L)) {
 #  ifdef __SPLIT_KERNEL__
-		Ray priv_ray = *ray;
-		shader_setup_from_background(kg, emission_sd, &priv_ray);
+    Ray priv_ray = *ray;
+    shader_setup_from_background(kg, emission_sd, &priv_ray);
 #  else
-		shader_setup_from_background(kg, emission_sd, ray);
+    shader_setup_from_background(kg, emission_sd, ray);
 #  endif
 
-		path_state_modify_bounce(state, true);
-		shader_eval_surface(kg, emission_sd, state, state->flag | PATH_RAY_EMISSION);
-		path_state_modify_bounce(state, false);
+    path_state_modify_bounce(state, true);
+    shader_eval_surface(kg, emission_sd, state, state->flag | PATH_RAY_EMISSION);
+    path_state_modify_bounce(state, false);
 
-		L = shader_background_eval(emission_sd);
-	}
+    L = shader_background_eval(emission_sd);
+  }
 
-	/* Background MIS weights. */
-#ifdef __BACKGROUND_MIS__
-	/* Check if background light exists or if we should skip pdf. */
-	int res_x = kernel_data.integrator.pdf_background_res_x;
+  /* Background MIS weights. */
+#  ifdef __BACKGROUND_MIS__
+  /* Check if background light exists or if we should skip pdf. */
+  int res_x = kernel_data.integrator.pdf_background_res_x;
 
-	if(!(state->flag & PATH_RAY_MIS_SKIP) && res_x) {
-		/* multiple importance sampling, get background light pdf for ray
-		 * direction, and compute weight with respect to BSDF pdf */
-		float pdf = background_light_pdf(kg, ray->P, ray->D);
-		float mis_weight = power_heuristic(state->ray_pdf, pdf);
+  if (!(state->flag & PATH_RAY_MIS_SKIP) && res_x) {
+    /* multiple importance sampling, get background light pdf for ray
+     * direction, and compute weight with respect to BSDF pdf */
+    float pdf = background_light_pdf(kg, ray->P, ray->D);
+    float mis_weight = power_heuristic(state->ray_pdf, pdf);
 
-		return L*mis_weight;
-	}
-#endif
+    return L * mis_weight;
+  }
+#  endif
 
-	return L;
+  return L;
 #else
-	return make_float3(0.8f, 0.8f, 0.8f);
+  return make_float3(0.8f, 0.8f, 0.8f);
 #endif
 }
 
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index b5f151d8663..d20f1adf663 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -18,72 +18,82 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device float4 film_map(KernelGlobals *kg, float4 irradiance, float scale)
 {
-	float exposure = kernel_data.film.exposure;
-	float4 result = irradiance*scale;
+  float exposure = kernel_data.film.exposure;
+  float4 result = irradiance * scale;
 
-	/* conversion to srgb */
-	result.x = color_linear_to_srgb(result.x*exposure);
-	result.y = color_linear_to_srgb(result.y*exposure);
-	result.z = color_linear_to_srgb(result.z*exposure);
+  /* conversion to srgb */
+  result.x = color_linear_to_srgb(result.x * exposure);
+  result.y = color_linear_to_srgb(result.y * exposure);
+  result.z = color_linear_to_srgb(result.z * exposure);
 
-	/* clamp since alpha might be > 1.0 due to russian roulette */
-	result.w = saturate(result.w);
+  /* clamp since alpha might be > 1.0 due to russian roulette */
+  result.w = saturate(result.w);
 
-	return result;
+  return result;
 }
 
 ccl_device uchar4 film_float_to_byte(float4 color)
 {
-	uchar4 result;
+  uchar4 result;
 
-	/* simple float to byte conversion */
-	result.x = (uchar)(saturate(color.x)*255.0f);
-	result.y = (uchar)(saturate(color.y)*255.0f);
-	result.z = (uchar)(saturate(color.z)*255.0f);
-	result.w = (uchar)(saturate(color.w)*255.0f);
+  /* simple float to byte conversion */
+  result.x = (uchar)(saturate(color.x) * 255.0f);
+  result.y = (uchar)(saturate(color.y) * 255.0f);
+  result.z = (uchar)(saturate(color.z) * 255.0f);
+  result.w = (uchar)(saturate(color.w) * 255.0f);
 
-	return result;
+  return result;
 }
 
 ccl_device void kernel_film_convert_to_byte(KernelGlobals *kg,
-	ccl_global uchar4 *rgba, ccl_global float *buffer,
-	float sample_scale, int x, int y, int offset, int stride)
+                                            ccl_global uchar4 *rgba,
+                                            ccl_global float *buffer,
+                                            float sample_scale,
+                                            int x,
+                                            int y,
+                                            int offset,
+                                            int stride)
 {
-	/* buffer offset */
-	int index = offset + x + y*stride;
+  /* buffer offset */
+  int index = offset + x + y * stride;
 
-	rgba += index;
-	buffer += index*kernel_data.film.pass_stride;
+  rgba += index;
+  buffer += index * kernel_data.film.pass_stride;
 
-	/* map colors */
-	float4 irradiance = *((ccl_global float4*)buffer);
-	float4 float_result = film_map(kg, irradiance, sample_scale);
-	uchar4 byte_result = film_float_to_byte(float_result);
+  /* map colors */
+  float4 irradiance = *((ccl_global float4 *)buffer);
+  float4 float_result = film_map(kg, irradiance, sample_scale);
+  uchar4 byte_result = film_float_to_byte(float_result);
 
-	*rgba = byte_result;
+  *rgba = byte_result;
 }
 
 ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg,
-	ccl_global uchar4 *rgba, ccl_global float *buffer,
-	float sample_scale, int x, int y, int offset, int stride)
+                                                  ccl_global uchar4 *rgba,
+                                                  ccl_global float *buffer,
+                                                  float sample_scale,
+                                                  int x,
+                                                  int y,
+                                                  int offset,
+                                                  int stride)
 {
-	/* buffer offset */
-	int index = offset + x + y*stride;
+  /* buffer offset */
+  int index = offset + x + y * stride;
 
-	ccl_global float4 *in = (ccl_global float4*)(buffer + index*kernel_data.film.pass_stride);
-	ccl_global half *out = (ccl_global half*)rgba + index*4;
+  ccl_global float4 *in = (ccl_global float4 *)(buffer + index * kernel_data.film.pass_stride);
+  ccl_global half *out = (ccl_global half *)rgba + index * 4;
 
-	float exposure = kernel_data.film.exposure;
+  float exposure = kernel_data.film.exposure;
 
-	float4 rgba_in = *in;
+  float4 rgba_in = *in;
 
-	if(exposure != 1.0f) {
-		rgba_in.x *= exposure;
-		rgba_in.y *= exposure;
-		rgba_in.z *= exposure;
-	}
+  if (exposure != 1.0f) {
+    rgba_in.x *= exposure;
+    rgba_in.y *= exposure;
+    rgba_in.z *= exposure;
+  }
 
-	float4_store_half(out, rgba_in, sample_scale);
+  float4_store_half(out, rgba_in, sample_scale);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index 59f1e252d21..9dbf3b7ea2e 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -54,41 +54,41 @@ typedef struct KernelGlobals {
 #  define KERNEL_TEX(type, name) texture<type> name;
 #  include "kernel/kernel_textures.h"
 
-	KernelData __data;
+  KernelData __data;
 
 #  ifdef __OSL__
-	/* On the CPU, we also have the OSL globals here. Most data structures are shared
-	 * with SVM, the difference is in the shaders and object/mesh attributes. */
-	OSLGlobals *osl;
-	OSLShadingSystem *osl_ss;
-	OSLThreadData *osl_tdata;
+  /* On the CPU, we also have the OSL globals here. Most data structures are shared
+   * with SVM, the difference is in the shaders and object/mesh attributes. */
+  OSLGlobals *osl;
+  OSLShadingSystem *osl_ss;
+  OSLThreadData *osl_tdata;
 #  endif
 
-	/* **** Run-time data ****  */
+  /* **** Run-time data ****  */
 
-	/* Heap-allocated storage for transparent shadows intersections. */
-	Intersection *transparent_shadow_intersections;
+  /* Heap-allocated storage for transparent shadows intersections. */
+  Intersection *transparent_shadow_intersections;
 
-	/* Storage for decoupled volume steps. */
-	VolumeStep *decoupled_volume_steps[2];
-	int decoupled_volume_steps_index;
+  /* Storage for decoupled volume steps. */
+  VolumeStep *decoupled_volume_steps[2];
+  int decoupled_volume_steps_index;
 
-	/* A buffer for storing per-pixel coverage for Cryptomatte. */
-	CoverageMap *coverage_object;
-	CoverageMap *coverage_material;
-	CoverageMap *coverage_asset;
+  /* A buffer for storing per-pixel coverage for Cryptomatte. */
+  CoverageMap *coverage_object;
+  CoverageMap *coverage_material;
+  CoverageMap *coverage_asset;
 
-	/* split kernel */
-	SplitData split_data;
-	SplitParams split_param_data;
+  /* split kernel */
+  SplitData split_data;
+  SplitParams split_param_data;
 
-	int2 global_size;
-	int2 global_id;
+  int2 global_size;
+  int2 global_id;
 
-	ProfilingState profiler;
+  ProfilingState profiler;
 } KernelGlobals;
 
-#endif  /* __KERNEL_CPU__ */
+#endif /* __KERNEL_CPU__ */
 
 /* For CUDA, constant memory textures must be globals, so we can't put them
  * into a struct. As a result we don't actually use this struct and use actual
@@ -99,124 +99,117 @@ typedef struct KernelGlobals {
 
 __constant__ KernelData __data;
 typedef struct KernelGlobals {
-	/* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
-	Intersection hits_stack[64];
+  /* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
+  Intersection hits_stack[64];
 } KernelGlobals;
 
 #  define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
 #  include "kernel/kernel_textures.h"
 
-#endif  /* __KERNEL_CUDA__ */
+#endif /* __KERNEL_CUDA__ */
 
 /* OpenCL */
 
 #ifdef __KERNEL_OPENCL__
 
-#  define KERNEL_TEX(type, name) \
-typedef type name##_t;
+#  define KERNEL_TEX(type, name) typedef type name##_t;
 #  include "kernel/kernel_textures.h"
 
 typedef ccl_addr_space struct KernelGlobals {
-	ccl_constant KernelData *data;
-	ccl_global char *buffers[8];
+  ccl_constant KernelData *data;
+  ccl_global char *buffers[8];
 
-#  define KERNEL_TEX(type, name) \
-	TextureInfo name;
+#  define KERNEL_TEX(type, name) TextureInfo name;
 #  include "kernel/kernel_textures.h"
 
 #  ifdef __SPLIT_KERNEL__
-	SplitData split_data;
-	SplitParams split_param_data;
+  SplitData split_data;
+  SplitParams split_param_data;
 #  endif
 } KernelGlobals;
 
-#define KERNEL_BUFFER_PARAMS \
-	ccl_global char *buffer0, \
-	ccl_global char *buffer1, \
-	ccl_global char *buffer2, \
-	ccl_global char *buffer3, \
-	ccl_global char *buffer4, \
-	ccl_global char *buffer5, \
-	ccl_global char *buffer6, \
-	ccl_global char *buffer7
+#  define KERNEL_BUFFER_PARAMS \
+    ccl_global char *buffer0, ccl_global char *buffer1, ccl_global char *buffer2, \
+        ccl_global char *buffer3, ccl_global char *buffer4, ccl_global char *buffer5, \
+        ccl_global char *buffer6, ccl_global char *buffer7
 
-#define KERNEL_BUFFER_ARGS buffer0, buffer1, buffer2, buffer3, buffer4, buffer5, buffer6, buffer7
+#  define KERNEL_BUFFER_ARGS buffer0, buffer1, buffer2, buffer3, buffer4, buffer5, buffer6, buffer7
 
 ccl_device_inline void kernel_set_buffer_pointers(KernelGlobals *kg, KERNEL_BUFFER_PARAMS)
 {
-#ifdef __SPLIT_KERNEL__
-	if(ccl_local_id(0) + ccl_local_id(1) == 0)
-#endif
-	{
-		kg->buffers[0] = buffer0;
-		kg->buffers[1] = buffer1;
-		kg->buffers[2] = buffer2;
-		kg->buffers[3] = buffer3;
-		kg->buffers[4] = buffer4;
-		kg->buffers[5] = buffer5;
-		kg->buffers[6] = buffer6;
-		kg->buffers[7] = buffer7;
-	}
+#  ifdef __SPLIT_KERNEL__
+  if (ccl_local_id(0) + ccl_local_id(1) == 0)
+#  endif
+  {
+    kg->buffers[0] = buffer0;
+    kg->buffers[1] = buffer1;
+    kg->buffers[2] = buffer2;
+    kg->buffers[3] = buffer3;
+    kg->buffers[4] = buffer4;
+    kg->buffers[5] = buffer5;
+    kg->buffers[6] = buffer6;
+    kg->buffers[7] = buffer7;
+  }
 
 #  ifdef __SPLIT_KERNEL__
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 #  endif
 }
 
 ccl_device_inline void kernel_set_buffer_info(KernelGlobals *kg)
 {
 #  ifdef __SPLIT_KERNEL__
-	if(ccl_local_id(0) + ccl_local_id(1) == 0)
+  if (ccl_local_id(0) + ccl_local_id(1) == 0)
 #  endif
-	{
-		ccl_global TextureInfo *info = (ccl_global TextureInfo*)kg->buffers[0];
+  {
+    ccl_global TextureInfo *info = (ccl_global TextureInfo *)kg->buffers[0];
 
-#  define KERNEL_TEX(type, name) \
-		kg->name = *(info++);
+#  define KERNEL_TEX(type, name) kg->name = *(info++);
 #  include "kernel/kernel_textures.h"
-	}
+  }
 
 #  ifdef __SPLIT_KERNEL__
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 #  endif
 }
 
-#endif  /* __KERNEL_OPENCL__ */
+#endif /* __KERNEL_OPENCL__ */
 
 /* Interpolated lookup table access */
 
 ccl_device float lookup_table_read(KernelGlobals *kg, float x, int offset, int size)
 {
-	x = saturate(x)*(size-1);
+  x = saturate(x) * (size - 1);
 
-	int index = min(float_to_int(x), size-1);
-	int nindex = min(index+1, size-1);
-	float t = x - index;
+  int index = min(float_to_int(x), size - 1);
+  int nindex = min(index + 1, size - 1);
+  float t = x - index;
 
-	float data0 = kernel_tex_fetch(__lookup_table, index + offset);
-	if(t == 0.0f)
-		return data0;
+  float data0 = kernel_tex_fetch(__lookup_table, index + offset);
+  if (t == 0.0f)
+    return data0;
 
-	float data1 = kernel_tex_fetch(__lookup_table, nindex + offset);
-	return (1.0f - t)*data0 + t*data1;
+  float data1 = kernel_tex_fetch(__lookup_table, nindex + offset);
+  return (1.0f - t) * data0 + t * data1;
 }
 
-ccl_device float lookup_table_read_2D(KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize)
+ccl_device float lookup_table_read_2D(
+    KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize)
 {
-	y = saturate(y)*(ysize-1);
+  y = saturate(y) * (ysize - 1);
 
-	int index = min(float_to_int(y), ysize-1);
-	int nindex = min(index+1, ysize-1);
-	float t = y - index;
+  int index = min(float_to_int(y), ysize - 1);
+  int nindex = min(index + 1, ysize - 1);
+  float t = y - index;
 
-	float data0 = lookup_table_read(kg, x, offset + xsize*index, xsize);
-	if(t == 0.0f)
-		return data0;
+  float data0 = lookup_table_read(kg, x, offset + xsize * index, xsize);
+  if (t == 0.0f)
+    return data0;
 
-	float data1 = lookup_table_read(kg, x, offset + xsize*nindex, xsize);
-	return (1.0f - t)*data0 + t*data1;
+  float data1 = lookup_table_read(kg, x, offset + xsize * nindex, xsize);
+  return (1.0f - t) * data0 + t * data1;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_GLOBALS_H__ */
+#endif /* __KERNEL_GLOBALS_H__ */
diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h
index 0cd65b1f2e8..c1f4e39e5e7 100644
--- a/intern/cycles/kernel/kernel_id_passes.h
+++ b/intern/cycles/kernel/kernel_id_passes.h
@@ -16,78 +16,83 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, int num_slots, float id, float weight)
+ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
+                                             int num_slots,
+                                             float id,
+                                             float weight)
 {
-	kernel_assert(id != ID_NONE);
-	if(weight == 0.0f) {
-		return;
-	}
+  kernel_assert(id != ID_NONE);
+  if (weight == 0.0f) {
+    return;
+  }
 
-	for(int slot = 0; slot < num_slots; slot++) {
-		ccl_global float2 *id_buffer = (ccl_global float2*)buffer;
+  for (int slot = 0; slot < num_slots; slot++) {
+    ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
 #ifdef __ATOMIC_PASS_WRITE__
-		/* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
-		if(id_buffer[slot].x == ID_NONE) {
-			/* Use an atomic to claim this slot.
-			* If a different thread got here first, try again from this slot on. */
-			float old_id = atomic_compare_and_swap_float(buffer+slot*2, ID_NONE, id);
-			if(old_id != ID_NONE && old_id != id) {
-				continue;
-			}
-			atomic_add_and_fetch_float(buffer+slot*2+1, weight);
-			break;
-		}
-		/* If there already is a slot for that ID, add the weight.
-		 * If no slot was found, add it to the last. */
-		else if(id_buffer[slot].x == id || slot == num_slots - 1) {
-			atomic_add_and_fetch_float(buffer+slot*2+1, weight);
-			break;
-		}
+    /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
+    if (id_buffer[slot].x == ID_NONE) {
+      /* Use an atomic to claim this slot.
+      * If a different thread got here first, try again from this slot on. */
+      float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id);
+      if (old_id != ID_NONE && old_id != id) {
+        continue;
+      }
+      atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
+      break;
+    }
+    /* If there already is a slot for that ID, add the weight.
+     * If no slot was found, add it to the last. */
+    else if (id_buffer[slot].x == id || slot == num_slots - 1) {
+      atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
+      break;
+    }
 #else  /* __ATOMIC_PASS_WRITE__ */
-		/* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
-		if(id_buffer[slot].x == ID_NONE) {
-			id_buffer[slot].x = id;
-			id_buffer[slot].y = weight;
-			break;
-		}
-		/* If there already is a slot for that ID, add the weight.
-		* If no slot was found, add it to the last. */
-		else if(id_buffer[slot].x == id || slot == num_slots - 1) {
-			id_buffer[slot].y += weight;
-			break;
-		}
-#endif  /* __ATOMIC_PASS_WRITE__ */
-	}
+    /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
+    if (id_buffer[slot].x == ID_NONE) {
+      id_buffer[slot].x = id;
+      id_buffer[slot].y = weight;
+      break;
+    }
+    /* If there already is a slot for that ID, add the weight.
+    * If no slot was found, add it to the last. */
+    else if (id_buffer[slot].x == id || slot == num_slots - 1) {
+      id_buffer[slot].y += weight;
+      break;
+    }
+#endif /* __ATOMIC_PASS_WRITE__ */
+  }
 }
 
 ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
 {
-	ccl_global float2 *id_buffer = (ccl_global float2*)buffer;
-	for(int slot = 1; slot < num_slots; ++slot) {
-		if(id_buffer[slot].x == ID_NONE) {
-			return;
-		}
-		/* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
-		int i = slot;
-		while(i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
-			float2 swap = id_buffer[i];
-			id_buffer[i] = id_buffer[i - 1];
-			id_buffer[i - 1] = swap;
-			--i;
-		}
-	}
+  ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
+  for (int slot = 1; slot < num_slots; ++slot) {
+    if (id_buffer[slot].x == ID_NONE) {
+      return;
+    }
+    /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
+    int i = slot;
+    while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
+      float2 swap = id_buffer[i];
+      id_buffer[i] = id_buffer[i - 1];
+      id_buffer[i - 1] = swap;
+      --i;
+    }
+  }
 }
 
 #ifdef __KERNEL_GPU__
 /* post-sorting for Cryptomatte */
-ccl_device void kernel_cryptomatte_post(KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride)
+ccl_device void kernel_cryptomatte_post(
+    KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride)
 {
-	if(sample - 1 == kernel_data.integrator.aa_samples) {
-		int index = offset + x + y * stride;
-		int pass_stride = kernel_data.film.pass_stride;
-		ccl_global float *cryptomatte_buffer = buffer + index * pass_stride + kernel_data.film.pass_cryptomatte;
-		kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
-	}
+  if (sample - 1 == kernel_data.integrator.aa_samples) {
+    int index = offset + x + y * stride;
+    int pass_stride = kernel_data.film.pass_stride;
+    ccl_global float *cryptomatte_buffer = buffer + index * pass_stride +
+                                           kernel_data.film.pass_cryptomatte;
+    kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
+  }
 }
 #endif
 
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index 3bde96b078c..f7270a14940 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -26,202 +26,202 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline bool cmj_is_pow2(int i)
 {
-	return (i > 1) && ((i & (i - 1)) == 0);
+  return (i > 1) && ((i & (i - 1)) == 0);
 }
 
 ccl_device_inline int cmj_fast_mod_pow2(int a, int b)
 {
-	return (a & (b - 1));
+  return (a & (b - 1));
 }
 
 /* b must be > 1 */
 ccl_device_inline int cmj_fast_div_pow2(int a, int b)
 {
-	kernel_assert(b > 1);
+  kernel_assert(b > 1);
 #if defined(__KERNEL_SSE2__)
 #  ifdef _MSC_VER
-	unsigned long ctz;
-	_BitScanForward(&ctz, b);
-	return a >> ctz;
+  unsigned long ctz;
+  _BitScanForward(&ctz, b);
+  return a >> ctz;
 #  else
-	return a >> __builtin_ctz(b);
+  return a >> __builtin_ctz(b);
 #  endif
 #elif defined(__KERNEL_CUDA__)
-	return a >> (__ffs(b) - 1);
+  return a >> (__ffs(b) - 1);
 #else
-	return a/b;
+  return a / b;
 #endif
 }
 
 ccl_device_inline uint cmj_w_mask(uint w)
 {
-	kernel_assert(w > 1);
+  kernel_assert(w > 1);
 #if defined(__KERNEL_SSE2__)
 #  ifdef _MSC_VER
-	unsigned long leading_zero;
-	_BitScanReverse(&leading_zero, w);
-	return ((1 << (1 + leading_zero)) - 1);
+  unsigned long leading_zero;
+  _BitScanReverse(&leading_zero, w);
+  return ((1 << (1 + leading_zero)) - 1);
 #  else
-	return ((1 << (32 - __builtin_clz(w))) - 1);
+  return ((1 << (32 - __builtin_clz(w))) - 1);
 #  endif
 #elif defined(__KERNEL_CUDA__)
-	return ((1 << (32 - __clz(w))) - 1);
+  return ((1 << (32 - __clz(w))) - 1);
 #else
-	w |= w >> 1;
-	w |= w >> 2;
-	w |= w >> 4;
-	w |= w >> 8;
-	w |= w >> 16;
+  w |= w >> 1;
+  w |= w >> 2;
+  w |= w >> 4;
+  w |= w >> 8;
+  w |= w >> 16;
 
-	return w;
+  return w;
 #endif
 }
 
 ccl_device_inline uint cmj_permute(uint i, uint l, uint p)
 {
-	uint w = l - 1;
-
-	if((l & w) == 0) {
-		/* l is a power of two (fast) */
-		i ^= p;
-		i *= 0xe170893d;
-		i ^= p >> 16;
-		i ^= (i & w) >> 4;
-		i ^= p >> 8;
-		i *= 0x0929eb3f;
-		i ^= p >> 23;
-		i ^= (i & w) >> 1;
-		i *= 1 | p >> 27;
-		i *= 0x6935fa69;
-		i ^= (i & w) >> 11;
-		i *= 0x74dcb303;
-		i ^= (i & w) >> 2;
-		i *= 0x9e501cc3;
-		i ^= (i & w) >> 2;
-		i *= 0xc860a3df;
-		i &= w;
-		i ^= i >> 5;
-
-		return (i + p) & w;
-	}
-	else {
-		/* l is not a power of two (slow) */
-		w = cmj_w_mask(w);
-
-		do {
-			i ^= p;
-			i *= 0xe170893d;
-			i ^= p >> 16;
-			i ^= (i & w) >> 4;
-			i ^= p >> 8;
-			i *= 0x0929eb3f;
-			i ^= p >> 23;
-			i ^= (i & w) >> 1;
-			i *= 1 | p >> 27;
-			i *= 0x6935fa69;
-			i ^= (i & w) >> 11;
-			i *= 0x74dcb303;
-			i ^= (i & w) >> 2;
-			i *= 0x9e501cc3;
-			i ^= (i & w) >> 2;
-			i *= 0xc860a3df;
-			i &= w;
-			i ^= i >> 5;
-		} while(i >= l);
-
-		return (i + p) % l;
-	}
+  uint w = l - 1;
+
+  if ((l & w) == 0) {
+    /* l is a power of two (fast) */
+    i ^= p;
+    i *= 0xe170893d;
+    i ^= p >> 16;
+    i ^= (i & w) >> 4;
+    i ^= p >> 8;
+    i *= 0x0929eb3f;
+    i ^= p >> 23;
+    i ^= (i & w) >> 1;
+    i *= 1 | p >> 27;
+    i *= 0x6935fa69;
+    i ^= (i & w) >> 11;
+    i *= 0x74dcb303;
+    i ^= (i & w) >> 2;
+    i *= 0x9e501cc3;
+    i ^= (i & w) >> 2;
+    i *= 0xc860a3df;
+    i &= w;
+    i ^= i >> 5;
+
+    return (i + p) & w;
+  }
+  else {
+    /* l is not a power of two (slow) */
+    w = cmj_w_mask(w);
+
+    do {
+      i ^= p;
+      i *= 0xe170893d;
+      i ^= p >> 16;
+      i ^= (i & w) >> 4;
+      i ^= p >> 8;
+      i *= 0x0929eb3f;
+      i ^= p >> 23;
+      i ^= (i & w) >> 1;
+      i *= 1 | p >> 27;
+      i *= 0x6935fa69;
+      i ^= (i & w) >> 11;
+      i *= 0x74dcb303;
+      i ^= (i & w) >> 2;
+      i *= 0x9e501cc3;
+      i ^= (i & w) >> 2;
+      i *= 0xc860a3df;
+      i &= w;
+      i ^= i >> 5;
+    } while (i >= l);
+
+    return (i + p) % l;
+  }
 }
 
 ccl_device_inline uint cmj_hash(uint i, uint p)
 {
-	i ^= p;
-	i ^= i >> 17;
-	i ^= i >> 10;
-	i *= 0xb36534e5;
-	i ^= i >> 12;
-	i ^= i >> 21;
-	i *= 0x93fc4795;
-	i ^= 0xdf6e307f;
-	i ^= i >> 17;
-	i *= 1 | p >> 18;
-
-	return i;
+  i ^= p;
+  i ^= i >> 17;
+  i ^= i >> 10;
+  i *= 0xb36534e5;
+  i ^= i >> 12;
+  i ^= i >> 21;
+  i *= 0x93fc4795;
+  i ^= 0xdf6e307f;
+  i ^= i >> 17;
+  i *= 1 | p >> 18;
+
+  return i;
 }
 
 ccl_device_inline uint cmj_hash_simple(uint i, uint p)
 {
-	i = (i ^ 61) ^ p;
-	i += i << 3;
-	i ^= i >> 4;
-	i *= 0x27d4eb2d;
-	return i;
+  i = (i ^ 61) ^ p;
+  i += i << 3;
+  i ^= i >> 4;
+  i *= 0x27d4eb2d;
+  return i;
 }
 
 ccl_device_inline float cmj_randfloat(uint i, uint p)
 {
-	return cmj_hash(i, p) * (1.0f / 4294967808.0f);
+  return cmj_hash(i, p) * (1.0f / 4294967808.0f);
 }
 
 #ifdef __CMJ__
 ccl_device float cmj_sample_1D(int s, int N, int p)
 {
-	kernel_assert(s < N);
+  kernel_assert(s < N);
 
-	uint x = cmj_permute(s, N, p * 0x68bc21eb);
-	float jx = cmj_randfloat(s, p * 0x967a889b);
+  uint x = cmj_permute(s, N, p * 0x68bc21eb);
+  float jx = cmj_randfloat(s, p * 0x967a889b);
 
-	float invN = 1.0f/N;
-	return (x + jx)*invN;
+  float invN = 1.0f / N;
+  return (x + jx) * invN;
 }
 
 /* TODO(sergey): Do some extra tests and consider moving to util_math.h. */
 ccl_device_inline int cmj_isqrt(int value)
 {
-#if defined(__KERNEL_CUDA__)
-	return float_to_int(__fsqrt_ru(value));
-#elif defined(__KERNEL_GPU__)
-	return float_to_int(sqrtf(value));
-#else
-	/* This is a work around for fast-math on CPU which might replace sqrtf()
-	 * with am approximated version.
-	 */
-	return float_to_int(sqrtf(value) + 1e-6f);
-#endif
+#  if defined(__KERNEL_CUDA__)
+  return float_to_int(__fsqrt_ru(value));
+#  elif defined(__KERNEL_GPU__)
+  return float_to_int(sqrtf(value));
+#  else
+  /* This is a work around for fast-math on CPU which might replace sqrtf()
+   * with am approximated version.
+   */
+  return float_to_int(sqrtf(value) + 1e-6f);
+#  endif
 }
 
 ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
 {
-	kernel_assert(s < N);
+  kernel_assert(s < N);
 
-	int m = cmj_isqrt(N);
-	int n = (N - 1)/m + 1;
-	float invN = 1.0f/N;
-	float invm = 1.0f/m;
-	float invn = 1.0f/n;
+  int m = cmj_isqrt(N);
+  int n = (N - 1) / m + 1;
+  float invN = 1.0f / N;
+  float invm = 1.0f / m;
+  float invn = 1.0f / n;
 
-	s = cmj_permute(s, N, p * 0x51633e2d);
+  s = cmj_permute(s, N, p * 0x51633e2d);
 
-	int sdivm, smodm;
+  int sdivm, smodm;
 
-	if(cmj_is_pow2(m)) {
-		sdivm = cmj_fast_div_pow2(s, m);
-		smodm = cmj_fast_mod_pow2(s, m);
-	}
-	else {
-		/* Doing s*inmv gives precision issues here. */
-		sdivm = s / m;
-		smodm = s - sdivm*m;
-	}
+  if (cmj_is_pow2(m)) {
+    sdivm = cmj_fast_div_pow2(s, m);
+    smodm = cmj_fast_mod_pow2(s, m);
+  }
+  else {
+    /* Doing s*inmv gives precision issues here. */
+    sdivm = s / m;
+    smodm = s - sdivm * m;
+  }
 
-	uint sx = cmj_permute(smodm, m, p * 0x68bc21eb);
-	uint sy = cmj_permute(sdivm, n, p * 0x02e5be93);
+  uint sx = cmj_permute(smodm, m, p * 0x68bc21eb);
+  uint sy = cmj_permute(sdivm, n, p * 0x02e5be93);
 
-	float jx = cmj_randfloat(s, p * 0x967a889b);
-	float jy = cmj_randfloat(s, p * 0x368cc8b7);
+  float jx = cmj_randfloat(s, p * 0x967a889b);
+  float jy = cmj_randfloat(s, p * 0x368cc8b7);
 
-	*fx = (sx + (sy + jx)*invn)*invm;
-	*fy = (s + jy)*invN;
+  *fx = (sx + (sy + jx) * invn) * invm;
+  *fy = (s + jy) * invN;
 }
 #endif
 
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 262d7df1364..5e24f8dedaf 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -19,18 +19,18 @@ CCL_NAMESPACE_BEGIN
 /* Light Sample result */
 
 typedef struct LightSample {
-	float3 P;			/* position on light, or direction for distant light */
-	float3 Ng;			/* normal on light */
-	float3 D;			/* direction from shading point to light */
-	float t;			/* distance to light (FLT_MAX for distant light) */
-	float u, v;			/* parametric coordinate on primitive */
-	float pdf;			/* light sampling probability density function */
-	float eval_fac;		/* intensity multiplier */
-	int object;			/* object id for triangle/curve lights */
-	int prim;			/* primitive id for triangle/curve lights */
-	int shader;			/* shader id */
-	int lamp;			/* lamp id */
-	LightType type;		/* type of light */
+  float3 P;       /* position on light, or direction for distant light */
+  float3 Ng;      /* normal on light */
+  float3 D;       /* direction from shading point to light */
+  float t;        /* distance to light (FLT_MAX for distant light) */
+  float u, v;     /* parametric coordinate on primitive */
+  float pdf;      /* light sampling probability density function */
+  float eval_fac; /* intensity multiplier */
+  int object;     /* object id for triangle/curve lights */
+  int prim;       /* primitive id for triangle/curve lights */
+  int shader;     /* shader id */
+  int lamp;       /* lamp id */
+  LightType type; /* type of light */
 } LightSample;
 
 /* Area light sampling */
@@ -46,130 +46,136 @@ typedef struct LightSample {
  */
 ccl_device_inline float rect_light_sample(float3 P,
                                           float3 *light_p,
-                                          float3 axisu, float3 axisv,
-                                          float randu, float randv,
+                                          float3 axisu,
+                                          float3 axisv,
+                                          float randu,
+                                          float randv,
                                           bool sample_coord)
 {
-	/* In our name system we're using P for the center,
-	 * which is o in the paper.
-	 */
-
-	float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
-	float axisu_len, axisv_len;
-	/* Compute local reference system R. */
-	float3 x = normalize_len(axisu, &axisu_len);
-	float3 y = normalize_len(axisv, &axisv_len);
-	float3 z = cross(x, y);
-	/* Compute rectangle coords in local reference system. */
-	float3 dir = corner - P;
-	float z0 = dot(dir, z);
-	/* Flip 'z' to make it point against Q. */
-	if(z0 > 0.0f) {
-		z *= -1.0f;
-		z0 *= -1.0f;
-	}
-	float x0 = dot(dir, x);
-	float y0 = dot(dir, y);
-	float x1 = x0 + axisu_len;
-	float y1 = y0 + axisv_len;
-	/* Compute internal angles (gamma_i). */
-	float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
-	float4 nz = make_float4(y0, x1, y1, x0) * diff;
-	nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
-	float g0 = safe_acosf(-nz.x * nz.y);
-	float g1 = safe_acosf(-nz.y * nz.z);
-	float g2 = safe_acosf(-nz.z * nz.w);
-	float g3 = safe_acosf(-nz.w * nz.x);
-	/* Compute predefined constants. */
-	float b0 = nz.x;
-	float b1 = nz.z;
-	float b0sq = b0 * b0;
-	float k = M_2PI_F - g2 - g3;
-	/* Compute solid angle from internal angles. */
-	float S = g0 + g1 - k;
-
-	if(sample_coord) {
-		/* Compute cu. */
-		float au = randu * S + k;
-		float fu = (cosf(au) * b0 - b1) / sinf(au);
-		float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
-		cu = clamp(cu, -1.0f, 1.0f);
-		/* Compute xu. */
-		float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
-		xu = clamp(xu, x0, x1);
-		/* Compute yv. */
-		float z0sq = z0 * z0;
-		float y0sq = y0 * y0;
-		float y1sq = y1 * y1;
-		float d = sqrtf(xu * xu + z0sq);
-		float h0 = y0 / sqrtf(d * d + y0sq);
-		float h1 = y1 / sqrtf(d * d + y1sq);
-		float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
-		float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
-
-		/* Transform (xu, yv, z0) to world coords. */
-		*light_p = P + xu * x + yv * y + z0 * z;
-	}
-
-	/* return pdf */
-	if(S != 0.0f)
-		return 1.0f / S;
-	else
-		return 0.0f;
+  /* In our name system we're using P for the center,
+   * which is o in the paper.
+   */
+
+  float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
+  float axisu_len, axisv_len;
+  /* Compute local reference system R. */
+  float3 x = normalize_len(axisu, &axisu_len);
+  float3 y = normalize_len(axisv, &axisv_len);
+  float3 z = cross(x, y);
+  /* Compute rectangle coords in local reference system. */
+  float3 dir = corner - P;
+  float z0 = dot(dir, z);
+  /* Flip 'z' to make it point against Q. */
+  if (z0 > 0.0f) {
+    z *= -1.0f;
+    z0 *= -1.0f;
+  }
+  float x0 = dot(dir, x);
+  float y0 = dot(dir, y);
+  float x1 = x0 + axisu_len;
+  float y1 = y0 + axisv_len;
+  /* Compute internal angles (gamma_i). */
+  float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
+  float4 nz = make_float4(y0, x1, y1, x0) * diff;
+  nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
+  float g0 = safe_acosf(-nz.x * nz.y);
+  float g1 = safe_acosf(-nz.y * nz.z);
+  float g2 = safe_acosf(-nz.z * nz.w);
+  float g3 = safe_acosf(-nz.w * nz.x);
+  /* Compute predefined constants. */
+  float b0 = nz.x;
+  float b1 = nz.z;
+  float b0sq = b0 * b0;
+  float k = M_2PI_F - g2 - g3;
+  /* Compute solid angle from internal angles. */
+  float S = g0 + g1 - k;
+
+  if (sample_coord) {
+    /* Compute cu. */
+    float au = randu * S + k;
+    float fu = (cosf(au) * b0 - b1) / sinf(au);
+    float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
+    cu = clamp(cu, -1.0f, 1.0f);
+    /* Compute xu. */
+    float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
+    xu = clamp(xu, x0, x1);
+    /* Compute yv. */
+    float z0sq = z0 * z0;
+    float y0sq = y0 * y0;
+    float y1sq = y1 * y1;
+    float d = sqrtf(xu * xu + z0sq);
+    float h0 = y0 / sqrtf(d * d + y0sq);
+    float h1 = y1 / sqrtf(d * d + y1sq);
+    float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
+    float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
+
+    /* Transform (xu, yv, z0) to world coords. */
+    *light_p = P + xu * x + yv * y + z0 * z;
+  }
+
+  /* return pdf */
+  if (S != 0.0f)
+    return 1.0f / S;
+  else
+    return 0.0f;
 }
 
 ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv)
 {
-	to_unit_disk(&randu, &randv);
-	return ru*randu + rv*randv;
+  to_unit_disk(&randu, &randv);
+  return ru * randu + rv * randv;
 }
 
 ccl_device float3 disk_light_sample(float3 v, float randu, float randv)
 {
-	float3 ru, rv;
+  float3 ru, rv;
 
-	make_orthonormals(v, &ru, &rv);
+  make_orthonormals(v, &ru, &rv);
 
-	return ellipse_sample(ru, rv, randu, randv);
+  return ellipse_sample(ru, rv, randu, randv);
 }
 
 ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv)
 {
-	return normalize(D + disk_light_sample(D, randu, randv)*radius);
+  return normalize(D + disk_light_sample(D, randu, randv) * radius);
 }
 
-ccl_device float3 sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
+ccl_device float3
+sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
 {
-	return disk_light_sample(normalize(P - center), randu, randv)*radius;
+  return disk_light_sample(normalize(P - center), randu, randv) * radius;
 }
 
-ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, LightSample *ls)
+ccl_device float spot_light_attenuation(float3 dir,
+                                        float spot_angle,
+                                        float spot_smooth,
+                                        LightSample *ls)
 {
-	float3 I = ls->Ng;
+  float3 I = ls->Ng;
 
-	float attenuation = dot(dir, I);
+  float attenuation = dot(dir, I);
 
-	if(attenuation <= spot_angle) {
-		attenuation = 0.0f;
-	}
-	else {
-		float t = attenuation - spot_angle;
+  if (attenuation <= spot_angle) {
+    attenuation = 0.0f;
+  }
+  else {
+    float t = attenuation - spot_angle;
 
-		if(t < spot_smooth && spot_smooth != 0.0f)
-			attenuation *= smoothstepf(t/spot_smooth);
-	}
+    if (t < spot_smooth && spot_smooth != 0.0f)
+      attenuation *= smoothstepf(t / spot_smooth);
+  }
 
-	return attenuation;
+  return attenuation;
 }
 
 ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
 {
-	float cos_pi = dot(Ng, I);
+  float cos_pi = dot(Ng, I);
 
-	if(cos_pi <= 0.0f)
-		return 0.0f;
+  if (cos_pi <= 0.0f)
+    return 0.0f;
 
-	return t*t/cos_pi;
+  return t * t / cos_pi;
 }
 
 /* Background Light */
@@ -180,203 +186,219 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3
  * devices, but we're so close to the release so better not screw things
  * up for CPU at least.
  */
-#ifdef __KERNEL_GPU__
+#  ifdef __KERNEL_GPU__
 ccl_device_noinline
-#else
+#  else
 ccl_device
-#endif
-float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
+#  endif
+    float3
+    background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
 {
-	/* for the following, the CDF values are actually a pair of floats, with the
-	 * function value as X and the actual CDF as Y.  The last entry's function
-	 * value is the CDF total. */
-	int res_x = kernel_data.integrator.pdf_background_res_x;
-	int res_y = kernel_data.integrator.pdf_background_res_y;
-	int cdf_width = res_x + 1;
-
-	/* this is basically std::lower_bound as used by pbrt */
-	int first = 0;
-	int count = res_y;
-
-	while(count > 0) {
-		int step = count >> 1;
-		int middle = first + step;
-
-		if(kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
-			first = middle + 1;
-			count -= step + 1;
-		}
-		else
-			count = step;
-	}
-
-	int index_v = max(0, first - 1);
-	kernel_assert(index_v >= 0 && index_v < res_y);
-
-	float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
-	float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
-	float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
-
-	/* importance-sampled V direction */
-	float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
-	float v = (index_v + dv) / res_y;
-
-	/* this is basically std::lower_bound as used by pbrt */
-	first = 0;
-	count = res_x;
-	while(count > 0) {
-		int step = count >> 1;
-		int middle = first + step;
-
-		if(kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y < randu) {
-			first = middle + 1;
-			count -= step + 1;
-		}
-		else
-			count = step;
-	}
-
-	int index_u = max(0, first - 1);
-	kernel_assert(index_u >= 0 && index_u < res_x);
-
-	float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u);
-	float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u + 1);
-	float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + res_x);
-
-	/* importance-sampled U direction */
-	float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
-	float u = (index_u + du) / res_x;
-
-	/* compute pdf */
-	float denom = cdf_last_u.x * cdf_last_v.x;
-	float sin_theta = sinf(M_PI_F * v);
-
-	if(sin_theta == 0.0f || denom == 0.0f)
-		*pdf = 0.0f;
-	else
-		*pdf = (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom);
-
-	/* compute direction */
-	return equirectangular_to_direction(u, v);
+  /* for the following, the CDF values are actually a pair of floats, with the
+   * function value as X and the actual CDF as Y.  The last entry's function
+   * value is the CDF total. */
+  int res_x = kernel_data.integrator.pdf_background_res_x;
+  int res_y = kernel_data.integrator.pdf_background_res_y;
+  int cdf_width = res_x + 1;
+
+  /* this is basically std::lower_bound as used by pbrt */
+  int first = 0;
+  int count = res_y;
+
+  while (count > 0) {
+    int step = count >> 1;
+    int middle = first + step;
+
+    if (kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
+      first = middle + 1;
+      count -= step + 1;
+    }
+    else
+      count = step;
+  }
+
+  int index_v = max(0, first - 1);
+  kernel_assert(index_v >= 0 && index_v < res_y);
+
+  float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
+  float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
+  float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
+
+  /* importance-sampled V direction */
+  float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
+  float v = (index_v + dv) / res_y;
+
+  /* this is basically std::lower_bound as used by pbrt */
+  first = 0;
+  count = res_x;
+  while (count > 0) {
+    int step = count >> 1;
+    int middle = first + step;
+
+    if (kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y <
+        randu) {
+      first = middle + 1;
+      count -= step + 1;
+    }
+    else
+      count = step;
+  }
+
+  int index_u = max(0, first - 1);
+  kernel_assert(index_u >= 0 && index_u < res_x);
+
+  float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
+                                  index_v * cdf_width + index_u);
+  float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf,
+                                       index_v * cdf_width + index_u + 1);
+  float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
+                                       index_v * cdf_width + res_x);
+
+  /* importance-sampled U direction */
+  float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
+  float u = (index_u + du) / res_x;
+
+  /* compute pdf */
+  float denom = cdf_last_u.x * cdf_last_v.x;
+  float sin_theta = sinf(M_PI_F * v);
+
+  if (sin_theta == 0.0f || denom == 0.0f)
+    *pdf = 0.0f;
+  else
+    *pdf = (cdf_u.x * cdf_v.x) / (M_2PI_F * M_PI_F * sin_theta * denom);
+
+  /* compute direction */
+  return equirectangular_to_direction(u, v);
 }
 
 /* TODO(sergey): Same as above, after the release we should consider using
  * 'noinline' for all devices.
  */
-#ifdef __KERNEL_GPU__
+#  ifdef __KERNEL_GPU__
 ccl_device_noinline
-#else
+#  else
 ccl_device
-#endif
-float background_map_pdf(KernelGlobals *kg, float3 direction)
+#  endif
+    float
+    background_map_pdf(KernelGlobals *kg, float3 direction)
 {
-	float2 uv = direction_to_equirectangular(direction);
-	int res_x = kernel_data.integrator.pdf_background_res_x;
-	int res_y = kernel_data.integrator.pdf_background_res_y;
-	int cdf_width = res_x + 1;
+  float2 uv = direction_to_equirectangular(direction);
+  int res_x = kernel_data.integrator.pdf_background_res_x;
+  int res_y = kernel_data.integrator.pdf_background_res_y;
+  int cdf_width = res_x + 1;
 
-	float sin_theta = sinf(uv.y * M_PI_F);
+  float sin_theta = sinf(uv.y * M_PI_F);
 
-	if(sin_theta == 0.0f)
-		return 0.0f;
+  if (sin_theta == 0.0f)
+    return 0.0f;
 
-	int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1);
-	int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1);
+  int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1);
+  int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1);
 
-	/* pdfs in V direction */
-	float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + res_x);
-	float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
+  /* pdfs in V direction */
+  float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
+                                       index_v * cdf_width + res_x);
+  float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
 
-	float denom = cdf_last_u.x * cdf_last_v.x;
+  float denom = cdf_last_u.x * cdf_last_v.x;
 
-	if(denom == 0.0f)
-		return 0.0f;
+  if (denom == 0.0f)
+    return 0.0f;
 
-	/* pdfs in U direction */
-	float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u);
-	float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
+  /* pdfs in U direction */
+  float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
+                                  index_v * cdf_width + index_u);
+  float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
 
-	return (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom);
+  return (cdf_u.x * cdf_v.x) / (M_2PI_F * M_PI_F * sin_theta * denom);
 }
 
-ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals *kg,
-                                                                   float3 P,
-                                                                   int index,
-                                                                   float3 *lightpos,
-                                                                   float3 *dir)
+ccl_device_inline bool background_portal_data_fetch_and_check_side(
+    KernelGlobals *kg, float3 P, int index, float3 *lightpos, float3 *dir)
 {
-	int portal = kernel_data.integrator.portal_offset + index;
-	const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+  int portal = kernel_data.integrator.portal_offset + index;
+  const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
 
-	*lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
-	*dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+  *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+  *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
 
-	/* Check whether portal is on the right side. */
-	if(dot(*dir, P - *lightpos) > 1e-4f)
-		return true;
+  /* Check whether portal is on the right side. */
+  if (dot(*dir, P - *lightpos) > 1e-4f)
+    return true;
 
-	return false;
+  return false;
 }
 
-ccl_device_inline float background_portal_pdf(KernelGlobals *kg,
-                                              float3 P,
-                                              float3 direction,
-                                              int ignore_portal,
-                                              bool *is_possible)
+ccl_device_inline float background_portal_pdf(
+    KernelGlobals *kg, float3 P, float3 direction, int ignore_portal, bool *is_possible)
 {
-	float portal_pdf = 0.0f;
-
-	int num_possible = 0;
-	for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
-		if(p == ignore_portal)
-			continue;
-
-		float3 lightpos, dir;
-		if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
-			continue;
-
-		/* There's a portal that could be sampled from this position. */
-		if(is_possible) {
-			*is_possible = true;
-		}
-		num_possible++;
-
-		int portal = kernel_data.integrator.portal_offset + p;
-		const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
-		float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
-		float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
-		bool is_round = (klight->area.invarea < 0.0f);
-
-		if(!ray_quad_intersect(P, direction, 1e-4f, FLT_MAX, lightpos, axisu, axisv, dir, NULL, NULL, NULL, NULL, is_round))
-			continue;
-
-		if(is_round) {
-			float t;
-			float3 D = normalize_len(lightpos - P, &t);
-			portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
-		}
-		else {
-			portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
-		}
-	}
-
-	if(ignore_portal >= 0) {
-		/* We have skipped a portal that could be sampled as well. */
-		num_possible++;
-	}
-
-	return (num_possible > 0)? portal_pdf / num_possible: 0.0f;
+  float portal_pdf = 0.0f;
+
+  int num_possible = 0;
+  for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
+    if (p == ignore_portal)
+      continue;
+
+    float3 lightpos, dir;
+    if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+      continue;
+
+    /* There's a portal that could be sampled from this position. */
+    if (is_possible) {
+      *is_possible = true;
+    }
+    num_possible++;
+
+    int portal = kernel_data.integrator.portal_offset + p;
+    const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+    float3 axisu = make_float3(
+        klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+    float3 axisv = make_float3(
+        klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+    bool is_round = (klight->area.invarea < 0.0f);
+
+    if (!ray_quad_intersect(P,
+                            direction,
+                            1e-4f,
+                            FLT_MAX,
+                            lightpos,
+                            axisu,
+                            axisv,
+                            dir,
+                            NULL,
+                            NULL,
+                            NULL,
+                            NULL,
+                            is_round))
+      continue;
+
+    if (is_round) {
+      float t;
+      float3 D = normalize_len(lightpos - P, &t);
+      portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
+    }
+    else {
+      portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
+    }
+  }
+
+  if (ignore_portal >= 0) {
+    /* We have skipped a portal that could be sampled as well. */
+    num_possible++;
+  }
+
+  return (num_possible > 0) ? portal_pdf / num_possible : 0.0f;
 }
 
 ccl_device int background_num_possible_portals(KernelGlobals *kg, float3 P)
 {
-	int num_possible_portals = 0;
-	for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
-		float3 lightpos, dir;
-		if(background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
-			num_possible_portals++;
-	}
-	return num_possible_portals;
+  int num_possible_portals = 0;
+  for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
+    float3 lightpos, dir;
+    if (background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+      num_possible_portals++;
+  }
+  return num_possible_portals;
 }
 
 ccl_device float3 background_portal_sample(KernelGlobals *kg,
@@ -387,774 +409,754 @@ ccl_device float3 background_portal_sample(KernelGlobals *kg,
                                            int *sampled_portal,
                                            float *pdf)
 {
-	/* Pick a portal, then re-normalize randv. */
-	randv *= num_possible;
-	int portal = (int)randv;
-	randv -= portal;
-
-	/* TODO(sergey): Some smarter way of finding portal to sample
-	 * is welcome.
-	 */
-	for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
-		/* Search for the sampled portal. */
-		float3 lightpos, dir;
-		if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
-			continue;
-
-		if(portal == 0) {
-			/* p is the portal to be sampled. */
-			int portal = kernel_data.integrator.portal_offset + p;
-			const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
-			float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
-			float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
-			bool is_round = (klight->area.invarea < 0.0f);
-
-			float3 D;
-			if(is_round) {
-				lightpos += ellipse_sample(axisu*0.5f, axisv*0.5f, randu, randv);
-				float t;
-				D = normalize_len(lightpos - P, &t);
-				*pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
-			}
-			else {
-				*pdf = rect_light_sample(P, &lightpos,
-				                         axisu, axisv,
-				                         randu, randv,
-				                         true);
-				D = normalize(lightpos - P);
-			}
-
-			*pdf /= num_possible;
-			*sampled_portal = p;
-			return D;
-		}
-
-		portal--;
-	}
-
-	return make_float3(0.0f, 0.0f, 0.0f);
+  /* Pick a portal, then re-normalize randv. */
+  randv *= num_possible;
+  int portal = (int)randv;
+  randv -= portal;
+
+  /* TODO(sergey): Some smarter way of finding portal to sample
+   * is welcome.
+   */
+  for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
+    /* Search for the sampled portal. */
+    float3 lightpos, dir;
+    if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+      continue;
+
+    if (portal == 0) {
+      /* p is the portal to be sampled. */
+      int portal = kernel_data.integrator.portal_offset + p;
+      const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+      float3 axisu = make_float3(
+          klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+      float3 axisv = make_float3(
+          klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+      bool is_round = (klight->area.invarea < 0.0f);
+
+      float3 D;
+      if (is_round) {
+        lightpos += ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
+        float t;
+        D = normalize_len(lightpos - P, &t);
+        *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
+      }
+      else {
+        *pdf = rect_light_sample(P, &lightpos, axisu, axisv, randu, randv, true);
+        D = normalize(lightpos - P);
+      }
+
+      *pdf /= num_possible;
+      *sampled_portal = p;
+      return D;
+    }
+
+    portal--;
+  }
+
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device_inline float3 background_light_sample(KernelGlobals *kg,
-                                                 float3 P,
-                                                 float randu, float randv,
-                                                 float *pdf)
+ccl_device_inline float3
+background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf)
 {
-	/* Probability of sampling portals instead of the map. */
-	float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
-
-	/* Check if there are portals in the scene which we can sample. */
-	if(portal_sampling_pdf > 0.0f) {
-		int num_portals = background_num_possible_portals(kg, P);
-		if(num_portals > 0) {
-			if(portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) {
-				if(portal_sampling_pdf < 1.0f) {
-					randu /= portal_sampling_pdf;
-				}
-				int portal;
-				float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
-				if(num_portals > 1) {
-					/* Ignore the chosen portal, its pdf is already included. */
-					*pdf += background_portal_pdf(kg, P, D, portal, NULL);
-				}
-				/* We could also have sampled the map, so combine with MIS. */
-				if(portal_sampling_pdf < 1.0f) {
-					float cdf_pdf = background_map_pdf(kg, D);
-					*pdf = (portal_sampling_pdf * (*pdf)
-					     + (1.0f - portal_sampling_pdf) * cdf_pdf);
-				}
-				return D;
-			}
-			else {
-				/* Sample map, but with nonzero portal_sampling_pdf for MIS. */
-				randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf);
-			}
-		}
-		else {
-			/* We can't sample a portal.
-			 * Check if we can sample the map instead.
-			 */
-			if(portal_sampling_pdf == 1.0f) {
-				/* Use uniform as a fallback if we can't sample the map. */
-				*pdf = 1.0f / M_4PI_F;
-				return sample_uniform_sphere(randu, randv);
-			}
-			else {
-				portal_sampling_pdf = 0.0f;
-			}
-		}
-	}
-
-	float3 D = background_map_sample(kg, randu, randv, pdf);
-	/* Use MIS if portals could be sampled as well. */
-	if(portal_sampling_pdf > 0.0f) {
-		float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL);
-		*pdf = (portal_sampling_pdf * portal_pdf
-		     + (1.0f - portal_sampling_pdf) * (*pdf));
-	}
-	return D;
+  /* Probability of sampling portals instead of the map. */
+  float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
+
+  /* Check if there are portals in the scene which we can sample. */
+  if (portal_sampling_pdf > 0.0f) {
+    int num_portals = background_num_possible_portals(kg, P);
+    if (num_portals > 0) {
+      if (portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) {
+        if (portal_sampling_pdf < 1.0f) {
+          randu /= portal_sampling_pdf;
+        }
+        int portal;
+        float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
+        if (num_portals > 1) {
+          /* Ignore the chosen portal, its pdf is already included. */
+          *pdf += background_portal_pdf(kg, P, D, portal, NULL);
+        }
+        /* We could also have sampled the map, so combine with MIS. */
+        if (portal_sampling_pdf < 1.0f) {
+          float cdf_pdf = background_map_pdf(kg, D);
+          *pdf = (portal_sampling_pdf * (*pdf) + (1.0f - portal_sampling_pdf) * cdf_pdf);
+        }
+        return D;
+      }
+      else {
+        /* Sample map, but with nonzero portal_sampling_pdf for MIS. */
+        randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf);
+      }
+    }
+    else {
+      /* We can't sample a portal.
+       * Check if we can sample the map instead.
+       */
+      if (portal_sampling_pdf == 1.0f) {
+        /* Use uniform as a fallback if we can't sample the map. */
+        *pdf = 1.0f / M_4PI_F;
+        return sample_uniform_sphere(randu, randv);
+      }
+      else {
+        portal_sampling_pdf = 0.0f;
+      }
+    }
+  }
+
+  float3 D = background_map_sample(kg, randu, randv, pdf);
+  /* Use MIS if portals could be sampled as well. */
+  if (portal_sampling_pdf > 0.0f) {
+    float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL);
+    *pdf = (portal_sampling_pdf * portal_pdf + (1.0f - portal_sampling_pdf) * (*pdf));
+  }
+  return D;
 }
 
 ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direction)
 {
-	/* Probability of sampling portals instead of the map. */
-	float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
-
-	float portal_pdf = 0.0f, map_pdf = 0.0f;
-	if(portal_sampling_pdf > 0.0f) {
-		/* Evaluate PDF of sampling this direction by portal sampling. */
-		bool is_possible = false;
-		portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf;
-		if(!is_possible) {
-			/* Portal sampling is not possible here because all portals point to the wrong side.
-			 * If map sampling is possible, it would be used instead, otherwise fallback sampling is used. */
-			if(portal_sampling_pdf == 1.0f) {
-				return kernel_data.integrator.pdf_lights / M_4PI_F;
-			}
-			else {
-				/* Force map sampling. */
-				portal_sampling_pdf = 0.0f;
-			}
-		}
-	}
-	if(portal_sampling_pdf < 1.0f) {
-		/* Evaluate PDF of sampling this direction by map sampling. */
-		map_pdf = background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf);
-	}
-	return (portal_pdf + map_pdf) * kernel_data.integrator.pdf_lights;
+  /* Probability of sampling portals instead of the map. */
+  float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
+
+  float portal_pdf = 0.0f, map_pdf = 0.0f;
+  if (portal_sampling_pdf > 0.0f) {
+    /* Evaluate PDF of sampling this direction by portal sampling. */
+    bool is_possible = false;
+    portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf;
+    if (!is_possible) {
+      /* Portal sampling is not possible here because all portals point to the wrong side.
+       * If map sampling is possible, it would be used instead, otherwise fallback sampling is used. */
+      if (portal_sampling_pdf == 1.0f) {
+        return kernel_data.integrator.pdf_lights / M_4PI_F;
+      }
+      else {
+        /* Force map sampling. */
+        portal_sampling_pdf = 0.0f;
+      }
+    }
+  }
+  if (portal_sampling_pdf < 1.0f) {
+    /* Evaluate PDF of sampling this direction by map sampling. */
+    map_pdf = background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf);
+  }
+  return (portal_pdf + map_pdf) * kernel_data.integrator.pdf_lights;
 }
 #endif
 
 /* Regular Light */
 
-ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
-                                         int lamp,
-                                         float randu, float randv,
-                                         float3 P,
-                                         LightSample *ls)
+ccl_device_inline bool lamp_light_sample(
+    KernelGlobals *kg, int lamp, float randu, float randv, float3 P, LightSample *ls)
 {
-	const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
-	LightType type = (LightType)klight->type;
-	ls->type = type;
-	ls->shader = klight->shader_id;
-	ls->object = PRIM_NONE;
-	ls->prim = PRIM_NONE;
-	ls->lamp = lamp;
-	ls->u = randu;
-	ls->v = randv;
-
-	if(type == LIGHT_DISTANT) {
-		/* distant light */
-		float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
-		float3 D = lightD;
-		float radius = klight->distant.radius;
-		float invarea = klight->distant.invarea;
-
-		if(radius > 0.0f)
-			D = distant_light_sample(D, radius, randu, randv);
-
-		ls->P = D;
-		ls->Ng = D;
-		ls->D = -D;
-		ls->t = FLT_MAX;
-
-		float costheta = dot(lightD, D);
-		ls->pdf = invarea/(costheta*costheta*costheta);
-		ls->eval_fac = ls->pdf;
-	}
+  const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+  LightType type = (LightType)klight->type;
+  ls->type = type;
+  ls->shader = klight->shader_id;
+  ls->object = PRIM_NONE;
+  ls->prim = PRIM_NONE;
+  ls->lamp = lamp;
+  ls->u = randu;
+  ls->v = randv;
+
+  if (type == LIGHT_DISTANT) {
+    /* distant light */
+    float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+    float3 D = lightD;
+    float radius = klight->distant.radius;
+    float invarea = klight->distant.invarea;
+
+    if (radius > 0.0f)
+      D = distant_light_sample(D, radius, randu, randv);
+
+    ls->P = D;
+    ls->Ng = D;
+    ls->D = -D;
+    ls->t = FLT_MAX;
+
+    float costheta = dot(lightD, D);
+    ls->pdf = invarea / (costheta * costheta * costheta);
+    ls->eval_fac = ls->pdf;
+  }
 #ifdef __BACKGROUND_MIS__
-	else if(type == LIGHT_BACKGROUND) {
-		/* infinite area light (e.g. light dome or env light) */
-		float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf);
-
-		ls->P = D;
-		ls->Ng = D;
-		ls->D = -D;
-		ls->t = FLT_MAX;
-		ls->eval_fac = 1.0f;
-	}
+  else if (type == LIGHT_BACKGROUND) {
+    /* infinite area light (e.g. light dome or env light) */
+    float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf);
+
+    ls->P = D;
+    ls->Ng = D;
+    ls->D = -D;
+    ls->t = FLT_MAX;
+    ls->eval_fac = 1.0f;
+  }
 #endif
-	else {
-		ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
-
-		if(type == LIGHT_POINT || type == LIGHT_SPOT) {
-			float radius = klight->spot.radius;
-
-			if(radius > 0.0f)
-				/* sphere light */
-				ls->P += sphere_light_sample(P, ls->P, radius, randu, randv);
-
-			ls->D = normalize_len(ls->P - P, &ls->t);
-			ls->Ng = -ls->D;
-
-			float invarea = klight->spot.invarea;
-			ls->eval_fac = (0.25f*M_1_PI_F)*invarea;
-			ls->pdf = invarea;
-
-			if(type == LIGHT_SPOT) {
-				/* spot light attenuation */
-				float3 dir = make_float3(klight->spot.dir[0],
-                                         klight->spot.dir[1],
-				                         klight->spot.dir[2]);
-				ls->eval_fac *= spot_light_attenuation(dir,
-				                                       klight->spot.spot_angle,
-				                                       klight->spot.spot_smooth,
-				                                       ls);
-				if(ls->eval_fac == 0.0f) {
-					return false;
-				}
-			}
-			float2 uv = map_to_sphere(ls->Ng);
-			ls->u = uv.x;
-			ls->v = uv.y;
-
-			ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
-		}
-		else {
-			/* area light */
-			float3 axisu = make_float3(klight->area.axisu[0],
-			                           klight->area.axisu[1],
-			                           klight->area.axisu[2]);
-			float3 axisv = make_float3(klight->area.axisv[0],
-			                           klight->area.axisv[1],
-			                           klight->area.axisv[2]);
-			float3 D = make_float3(klight->area.dir[0],
-			                       klight->area.dir[1],
-			                       klight->area.dir[2]);
-			float invarea = fabsf(klight->area.invarea);
-			bool is_round = (klight->area.invarea < 0.0f);
-
-			if(dot(ls->P - P, D) > 0.0f) {
-				return false;
-			}
-
-			float3 inplane;
-
-			if(is_round) {
-				inplane = ellipse_sample(axisu*0.5f, axisv*0.5f, randu, randv);
-				ls->P += inplane;
-				ls->pdf = invarea;
-			}
-			else {
-				inplane = ls->P;
-				ls->pdf = rect_light_sample(P, &ls->P,
-				                            axisu, axisv,
-				                            randu, randv,
-				                            true);
-				inplane = ls->P - inplane;
-			}
-
-			ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f;
-			ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f;
-
-			ls->Ng = D;
-			ls->D = normalize_len(ls->P - P, &ls->t);
-
-			ls->eval_fac = 0.25f*invarea;
-			if(is_round) {
-				ls->pdf *= lamp_light_pdf(kg, D, -ls->D, ls->t);
-			}
-		}
-	}
-
-	ls->pdf *= kernel_data.integrator.pdf_lights;
-
-	return (ls->pdf > 0.0f);
+  else {
+    ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+
+    if (type == LIGHT_POINT || type == LIGHT_SPOT) {
+      float radius = klight->spot.radius;
+
+      if (radius > 0.0f)
+        /* sphere light */
+        ls->P += sphere_light_sample(P, ls->P, radius, randu, randv);
+
+      ls->D = normalize_len(ls->P - P, &ls->t);
+      ls->Ng = -ls->D;
+
+      float invarea = klight->spot.invarea;
+      ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
+      ls->pdf = invarea;
+
+      if (type == LIGHT_SPOT) {
+        /* spot light attenuation */
+        float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
+        ls->eval_fac *= spot_light_attenuation(
+            dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls);
+        if (ls->eval_fac == 0.0f) {
+          return false;
+        }
+      }
+      float2 uv = map_to_sphere(ls->Ng);
+      ls->u = uv.x;
+      ls->v = uv.y;
+
+      ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
+    }
+    else {
+      /* area light */
+      float3 axisu = make_float3(
+          klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+      float3 axisv = make_float3(
+          klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+      float3 D = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+      float invarea = fabsf(klight->area.invarea);
+      bool is_round = (klight->area.invarea < 0.0f);
+
+      if (dot(ls->P - P, D) > 0.0f) {
+        return false;
+      }
+
+      float3 inplane;
+
+      if (is_round) {
+        inplane = ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
+        ls->P += inplane;
+        ls->pdf = invarea;
+      }
+      else {
+        inplane = ls->P;
+        ls->pdf = rect_light_sample(P, &ls->P, axisu, axisv, randu, randv, true);
+        inplane = ls->P - inplane;
+      }
+
+      ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f;
+      ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f;
+
+      ls->Ng = D;
+      ls->D = normalize_len(ls->P - P, &ls->t);
+
+      ls->eval_fac = 0.25f * invarea;
+      if (is_round) {
+        ls->pdf *= lamp_light_pdf(kg, D, -ls->D, ls->t);
+      }
+    }
+  }
+
+  ls->pdf *= kernel_data.integrator.pdf_lights;
+
+  return (ls->pdf > 0.0f);
 }
 
-ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
+ccl_device bool lamp_light_eval(
+    KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
 {
-	const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
-	LightType type = (LightType)klight->type;
-	ls->type = type;
-	ls->shader = klight->shader_id;
-	ls->object = PRIM_NONE;
-	ls->prim = PRIM_NONE;
-	ls->lamp = lamp;
-	/* todo: missing texture coordinates */
-	ls->u = 0.0f;
-	ls->v = 0.0f;
-
-	if(!(ls->shader & SHADER_USE_MIS))
-		return false;
-
-	if(type == LIGHT_DISTANT) {
-		/* distant light */
-		float radius = klight->distant.radius;
-
-		if(radius == 0.0f)
-			return false;
-		if(t != FLT_MAX)
-			return false;
-
-		/* a distant light is infinitely far away, but equivalent to a disk
-		 * shaped light exactly 1 unit away from the current shading point.
-		 *
-		 *     radius              t^2/cos(theta)
-		 *  <---------->           t = sqrt(1^2 + tan(theta)^2)
-		 *       tan(th)           area = radius*radius*pi
-		 *       <----->
-		 *        \    |           (1 + tan(theta)^2)/cos(theta)
-		 *         \   |           (1 + tan(acos(cos(theta)))^2)/cos(theta)
-		 *       t  \th| 1         simplifies to
-		 *           \-|           1/(cos(theta)^3)
-		 *            \|           magic!
-		 *             P
-		 */
-
-		float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
-		float costheta = dot(-lightD, D);
-		float cosangle = klight->distant.cosangle;
-
-		if(costheta < cosangle)
-			return false;
-
-		ls->P = -D;
-		ls->Ng = -D;
-		ls->D = D;
-		ls->t = FLT_MAX;
-
-		/* compute pdf */
-		float invarea = klight->distant.invarea;
-		ls->pdf = invarea/(costheta*costheta*costheta);
-		ls->eval_fac = ls->pdf;
-	}
-	else if(type == LIGHT_POINT || type == LIGHT_SPOT) {
-		float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
-
-		float radius = klight->spot.radius;
-
-		/* sphere light */
-		if(radius == 0.0f)
-			return false;
-
-		if(!ray_aligned_disk_intersect(P, D, t,
-		                               lightP, radius, &ls->P, &ls->t))
-		{
-			return false;
-		}
-
-		ls->Ng = -D;
-		ls->D = D;
-
-		float invarea = klight->spot.invarea;
-		ls->eval_fac = (0.25f*M_1_PI_F)*invarea;
-		ls->pdf = invarea;
-
-		if(type == LIGHT_SPOT) {
-			/* spot light attenuation */
-			float3 dir = make_float3(klight->spot.dir[0],
-			                         klight->spot.dir[1],
-			                         klight->spot.dir[2]);
-			ls->eval_fac *= spot_light_attenuation(dir,
-			                                       klight->spot.spot_angle,
-			                                       klight->spot.spot_smooth,
-			                                       ls);
-
-			if(ls->eval_fac == 0.0f)
-				return false;
-		}
-		float2 uv = map_to_sphere(ls->Ng);
-		ls->u = uv.x;
-		ls->v = uv.y;
-
-		/* compute pdf */
-		if(ls->t != FLT_MAX)
-			ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
-	}
-	else if(type == LIGHT_AREA) {
-		/* area light */
-		float invarea = fabsf(klight->area.invarea);
-		bool is_round = (klight->area.invarea < 0.0f);
-		if(invarea == 0.0f)
-			return false;
-
-		float3 axisu = make_float3(klight->area.axisu[0],
-		                           klight->area.axisu[1],
-		                           klight->area.axisu[2]);
-		float3 axisv = make_float3(klight->area.axisv[0],
-		                           klight->area.axisv[1],
-		                           klight->area.axisv[2]);
-		float3 Ng = make_float3(klight->area.dir[0],
-		                        klight->area.dir[1],
-		                        klight->area.dir[2]);
-
-		/* one sided */
-		if(dot(D, Ng) >= 0.0f)
-			return false;
-
-		float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
-
-		if(!ray_quad_intersect(P, D, 0.0f, t, light_P,
-		                       axisu, axisv, Ng,
-		                       &ls->P, &ls->t,
-		                       &ls->u, &ls->v,
-		                       is_round))
-		{
-			return false;
-		}
-
-		ls->D = D;
-		ls->Ng = Ng;
-		if(is_round) {
-			ls->pdf = invarea * lamp_light_pdf(kg, Ng, -D, ls->t);
-		}
-		else {
-			ls->pdf = rect_light_sample(P, &light_P, axisu, axisv, 0, 0, false);
-		}
-		ls->eval_fac = 0.25f*invarea;
-	}
-	else {
-		return false;
-	}
-
-	ls->pdf *= kernel_data.integrator.pdf_lights;
-
-	return true;
+  const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+  LightType type = (LightType)klight->type;
+  ls->type = type;
+  ls->shader = klight->shader_id;
+  ls->object = PRIM_NONE;
+  ls->prim = PRIM_NONE;
+  ls->lamp = lamp;
+  /* todo: missing texture coordinates */
+  ls->u = 0.0f;
+  ls->v = 0.0f;
+
+  if (!(ls->shader & SHADER_USE_MIS))
+    return false;
+
+  if (type == LIGHT_DISTANT) {
+    /* distant light */
+    float radius = klight->distant.radius;
+
+    if (radius == 0.0f)
+      return false;
+    if (t != FLT_MAX)
+      return false;
+
+    /* a distant light is infinitely far away, but equivalent to a disk
+     * shaped light exactly 1 unit away from the current shading point.
+     *
+     *     radius              t^2/cos(theta)
+     *  <---------->           t = sqrt(1^2 + tan(theta)^2)
+     *       tan(th)           area = radius*radius*pi
+     *       <----->
+     *        \    |           (1 + tan(theta)^2)/cos(theta)
+     *         \   |           (1 + tan(acos(cos(theta)))^2)/cos(theta)
+     *       t  \th| 1         simplifies to
+     *           \-|           1/(cos(theta)^3)
+     *            \|           magic!
+     *             P
+     */
+
+    float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+    float costheta = dot(-lightD, D);
+    float cosangle = klight->distant.cosangle;
+
+    if (costheta < cosangle)
+      return false;
+
+    ls->P = -D;
+    ls->Ng = -D;
+    ls->D = D;
+    ls->t = FLT_MAX;
+
+    /* compute pdf */
+    float invarea = klight->distant.invarea;
+    ls->pdf = invarea / (costheta * costheta * costheta);
+    ls->eval_fac = ls->pdf;
+  }
+  else if (type == LIGHT_POINT || type == LIGHT_SPOT) {
+    float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+
+    float radius = klight->spot.radius;
+
+    /* sphere light */
+    if (radius == 0.0f)
+      return false;
+
+    if (!ray_aligned_disk_intersect(P, D, t, lightP, radius, &ls->P, &ls->t)) {
+      return false;
+    }
+
+    ls->Ng = -D;
+    ls->D = D;
+
+    float invarea = klight->spot.invarea;
+    ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
+    ls->pdf = invarea;
+
+    if (type == LIGHT_SPOT) {
+      /* spot light attenuation */
+      float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
+      ls->eval_fac *= spot_light_attenuation(
+          dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls);
+
+      if (ls->eval_fac == 0.0f)
+        return false;
+    }
+    float2 uv = map_to_sphere(ls->Ng);
+    ls->u = uv.x;
+    ls->v = uv.y;
+
+    /* compute pdf */
+    if (ls->t != FLT_MAX)
+      ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
+  }
+  else if (type == LIGHT_AREA) {
+    /* area light */
+    float invarea = fabsf(klight->area.invarea);
+    bool is_round = (klight->area.invarea < 0.0f);
+    if (invarea == 0.0f)
+      return false;
+
+    float3 axisu = make_float3(
+        klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+    float3 axisv = make_float3(
+        klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+    float3 Ng = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+
+    /* one sided */
+    if (dot(D, Ng) >= 0.0f)
+      return false;
+
+    float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+
+    if (!ray_quad_intersect(
+            P, D, 0.0f, t, light_P, axisu, axisv, Ng, &ls->P, &ls->t, &ls->u, &ls->v, is_round)) {
+      return false;
+    }
+
+    ls->D = D;
+    ls->Ng = Ng;
+    if (is_round) {
+      ls->pdf = invarea * lamp_light_pdf(kg, Ng, -D, ls->t);
+    }
+    else {
+      ls->pdf = rect_light_sample(P, &light_P, axisu, axisv, 0, 0, false);
+    }
+    ls->eval_fac = 0.25f * invarea;
+  }
+  else {
+    return false;
+  }
+
+  ls->pdf *= kernel_data.integrator.pdf_lights;
+
+  return true;
 }
 
 /* Triangle Light */
 
 /* returns true if the triangle is has motion blur or an instancing transform applied */
-ccl_device_inline bool triangle_world_space_vertices(KernelGlobals *kg, int object, int prim, float time, float3 V[3])
+ccl_device_inline bool triangle_world_space_vertices(
+    KernelGlobals *kg, int object, int prim, float time, float3 V[3])
 {
-	bool has_motion = false;
-	const int object_flag = kernel_tex_fetch(__object_flag, object);
+  bool has_motion = false;
+  const int object_flag = kernel_tex_fetch(__object_flag, object);
 
-	if(object_flag & SD_OBJECT_HAS_VERTEX_MOTION && time >= 0.0f) {
-		motion_triangle_vertices(kg, object, prim, time, V);
-		has_motion = true;
-	}
-	else {
-		triangle_vertices(kg, prim, V);
-	}
+  if (object_flag & SD_OBJECT_HAS_VERTEX_MOTION && time >= 0.0f) {
+    motion_triangle_vertices(kg, object, prim, time, V);
+    has_motion = true;
+  }
+  else {
+    triangle_vertices(kg, prim, V);
+  }
 
 #ifdef __INSTANCING__
-	if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
 #  ifdef __OBJECT_MOTION__
-		float object_time = (time >= 0.0f) ? time : 0.5f;
-		Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL);
+    float object_time = (time >= 0.0f) ? time : 0.5f;
+    Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL);
 #  else
-		Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+    Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
 #  endif
-		V[0] = transform_point(&tfm, V[0]);
-		V[1] = transform_point(&tfm, V[1]);
-		V[2] = transform_point(&tfm, V[2]);
-		has_motion = true;
-	}
+    V[0] = transform_point(&tfm, V[0]);
+    V[1] = transform_point(&tfm, V[1]);
+    V[2] = transform_point(&tfm, V[2]);
+    has_motion = true;
+  }
 #endif
-	return has_motion;
+  return has_motion;
 }
 
-ccl_device_inline float triangle_light_pdf_area(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
+ccl_device_inline float triangle_light_pdf_area(KernelGlobals *kg,
+                                                const float3 Ng,
+                                                const float3 I,
+                                                float t)
 {
-	float pdf = kernel_data.integrator.pdf_triangles;
-	float cos_pi = fabsf(dot(Ng, I));
+  float pdf = kernel_data.integrator.pdf_triangles;
+  float cos_pi = fabsf(dot(Ng, I));
 
-	if(cos_pi == 0.0f)
-		return 0.0f;
+  if (cos_pi == 0.0f)
+    return 0.0f;
 
-	return t*t*pdf/cos_pi;
+  return t * t * pdf / cos_pi;
 }
 
 ccl_device_forceinline float triangle_light_pdf(KernelGlobals *kg, ShaderData *sd, float t)
 {
-	/* A naive heuristic to decide between costly solid angle sampling
-	 * and simple area sampling, comparing the distance to the triangle plane
-	 * to the length of the edges of the triangle. */
-
-	float3 V[3];
-	bool has_motion = triangle_world_space_vertices(kg, sd->object, sd->prim, sd->time, V);
-
-	const float3 e0 = V[1] - V[0];
-	const float3 e1 = V[2] - V[0];
-	const float3 e2 = V[2] - V[1];
-	const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
-	const float3 N = cross(e0, e1);
-	const float distance_to_plane = fabsf(dot(N, sd->I * t))/dot(N, N);
-
-	if(longest_edge_squared > distance_to_plane*distance_to_plane) {
-		/* sd contains the point on the light source
-		 * calculate Px, the point that we're shading */
-		const float3 Px = sd->P + sd->I * t;
-		const float3 v0_p = V[0] - Px;
-		const float3 v1_p = V[1] - Px;
-		const float3 v2_p = V[2] - Px;
-
-		const float3 u01 = safe_normalize(cross(v0_p, v1_p));
-		const float3 u02 = safe_normalize(cross(v0_p, v2_p));
-		const float3 u12 = safe_normalize(cross(v1_p, v2_p));
-
-		const float alpha = fast_acosf(dot(u02, u01));
-		const float beta = fast_acosf(-dot(u01, u12));
-		const float gamma = fast_acosf(dot(u02, u12));
-		const float solid_angle =  alpha + beta + gamma - M_PI_F;
-
-		/* pdf_triangles is calculated over triangle area, but we're not sampling over its area */
-		if(UNLIKELY(solid_angle == 0.0f)) {
-			return 0.0f;
-		}
-		else {
-			float area = 1.0f;
-			if(has_motion) {
-				/* get the center frame vertices, this is what the PDF was calculated from */
-				triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
-				area = triangle_area(V[0], V[1], V[2]);
-			}
-			else {
-				area = 0.5f * len(N);
-			}
-			const float pdf = area * kernel_data.integrator.pdf_triangles;
-			return pdf / solid_angle;
-		}
-	}
-	else {
-		float pdf = triangle_light_pdf_area(kg, sd->Ng, sd->I, t);
-		if(has_motion) {
-			const float	area = 0.5f * len(N);
-			if(UNLIKELY(area == 0.0f)) {
-				return 0.0f;
-			}
-			/* scale the PDF.
-			 * area = the area the sample was taken from
-			 * area_pre = the are from which pdf_triangles was calculated from */
-			triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
-			const float area_pre = triangle_area(V[0], V[1], V[2]);
-			pdf = pdf * area_pre / area;
-		}
-		return pdf;
-	}
+  /* A naive heuristic to decide between costly solid angle sampling
+   * and simple area sampling, comparing the distance to the triangle plane
+   * to the length of the edges of the triangle. */
+
+  float3 V[3];
+  bool has_motion = triangle_world_space_vertices(kg, sd->object, sd->prim, sd->time, V);
+
+  const float3 e0 = V[1] - V[0];
+  const float3 e1 = V[2] - V[0];
+  const float3 e2 = V[2] - V[1];
+  const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
+  const float3 N = cross(e0, e1);
+  const float distance_to_plane = fabsf(dot(N, sd->I * t)) / dot(N, N);
+
+  if (longest_edge_squared > distance_to_plane * distance_to_plane) {
+    /* sd contains the point on the light source
+     * calculate Px, the point that we're shading */
+    const float3 Px = sd->P + sd->I * t;
+    const float3 v0_p = V[0] - Px;
+    const float3 v1_p = V[1] - Px;
+    const float3 v2_p = V[2] - Px;
+
+    const float3 u01 = safe_normalize(cross(v0_p, v1_p));
+    const float3 u02 = safe_normalize(cross(v0_p, v2_p));
+    const float3 u12 = safe_normalize(cross(v1_p, v2_p));
+
+    const float alpha = fast_acosf(dot(u02, u01));
+    const float beta = fast_acosf(-dot(u01, u12));
+    const float gamma = fast_acosf(dot(u02, u12));
+    const float solid_angle = alpha + beta + gamma - M_PI_F;
+
+    /* pdf_triangles is calculated over triangle area, but we're not sampling over its area */
+    if (UNLIKELY(solid_angle == 0.0f)) {
+      return 0.0f;
+    }
+    else {
+      float area = 1.0f;
+      if (has_motion) {
+        /* get the center frame vertices, this is what the PDF was calculated from */
+        triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
+        area = triangle_area(V[0], V[1], V[2]);
+      }
+      else {
+        area = 0.5f * len(N);
+      }
+      const float pdf = area * kernel_data.integrator.pdf_triangles;
+      return pdf / solid_angle;
+    }
+  }
+  else {
+    float pdf = triangle_light_pdf_area(kg, sd->Ng, sd->I, t);
+    if (has_motion) {
+      const float area = 0.5f * len(N);
+      if (UNLIKELY(area == 0.0f)) {
+        return 0.0f;
+      }
+      /* scale the PDF.
+       * area = the area the sample was taken from
+       * area_pre = the are from which pdf_triangles was calculated from */
+      triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
+      const float area_pre = triangle_area(V[0], V[1], V[2]);
+      pdf = pdf * area_pre / area;
+    }
+    return pdf;
+  }
 }
 
-ccl_device_forceinline void triangle_light_sample(KernelGlobals *kg, int prim, int object,
-	float randu, float randv, float time, LightSample *ls, const float3 P)
+ccl_device_forceinline void triangle_light_sample(KernelGlobals *kg,
+                                                  int prim,
+                                                  int object,
+                                                  float randu,
+                                                  float randv,
+                                                  float time,
+                                                  LightSample *ls,
+                                                  const float3 P)
 {
-	/* A naive heuristic to decide between costly solid angle sampling
-	 * and simple area sampling, comparing the distance to the triangle plane
-	 * to the length of the edges of the triangle. */
-
-	float3 V[3];
-	bool has_motion = triangle_world_space_vertices(kg, object, prim, time, V);
-
-	const float3 e0 = V[1] - V[0];
-	const float3 e1 = V[2] - V[0];
-	const float3 e2 = V[2] - V[1];
-	const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
-	const float3 N0 = cross(e0, e1);
-	float Nl = 0.0f;
-	ls->Ng = safe_normalize_len(N0, &Nl);
-	float area = 0.5f * Nl;
-
-	/* flip normal if necessary */
-	const int object_flag = kernel_tex_fetch(__object_flag, object);
-	if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-		ls->Ng = -ls->Ng;
-	}
-	ls->eval_fac = 1.0f;
-	ls->shader = kernel_tex_fetch(__tri_shader, prim);
-	ls->object = object;
-	ls->prim = prim;
-	ls->lamp = LAMP_NONE;
-	ls->shader |= SHADER_USE_MIS;
-	ls->type = LIGHT_TRIANGLE;
-
-	float distance_to_plane = fabsf(dot(N0, V[0] - P)/dot(N0, N0));
-
-	if(longest_edge_squared > distance_to_plane*distance_to_plane) {
-		/* see James Arvo, "Stratified Sampling of Spherical Triangles"
-		 * http://www.graphics.cornell.edu/pubs/1995/Arv95c.pdf */
-
-		/* project the triangle to the unit sphere
-		 * and calculate its edges and angles */
-		const float3 v0_p = V[0] - P;
-		const float3 v1_p = V[1] - P;
-		const float3 v2_p = V[2] - P;
-
-		const float3 u01 = safe_normalize(cross(v0_p, v1_p));
-		const float3 u02 = safe_normalize(cross(v0_p, v2_p));
-		const float3 u12 = safe_normalize(cross(v1_p, v2_p));
-
-		const float3 A = safe_normalize(v0_p);
-		const float3 B = safe_normalize(v1_p);
-		const float3 C = safe_normalize(v2_p);
-
-		const float cos_alpha = dot(u02, u01);
-		const float cos_beta = -dot(u01, u12);
-		const float cos_gamma = dot(u02, u12);
-
-		/* calculate dihedral angles */
-		const float alpha = fast_acosf(cos_alpha);
-		const float beta = fast_acosf(cos_beta);
-		const float gamma = fast_acosf(cos_gamma);
-		/* the area of the unit spherical triangle = solid angle */
-		const float solid_angle =  alpha + beta + gamma - M_PI_F;
-
-		/* precompute a few things
-		 * these could be re-used to take several samples
-		 * as they are independent of randu/randv */
-		const float cos_c = dot(A, B);
-		const float sin_alpha = fast_sinf(alpha);
-		const float product = sin_alpha * cos_c;
-
-		/* Select a random sub-area of the spherical triangle
-		 * and calculate the third vertex C_ of that new triangle */
-		const float phi = randu * solid_angle - alpha;
-		float s, t;
-		fast_sincosf(phi, &s, &t);
-		const float u = t - cos_alpha;
-		const float v = s + product;
-
-		const float3 U = safe_normalize(C - dot(C, A) * A);
-
-		float q = 1.0f;
-		const float det = ((v * s + u * t) * sin_alpha);
-		if(det != 0.0f) {
-			q = ((v * t - u * s) * cos_alpha - v) / det;
-		}
-		const float temp = max(1.0f - q*q, 0.0f);
-
-		const float3 C_ = safe_normalize(q * A + sqrtf(temp) * U);
-
-		/* Finally, select a random point along the edge of the new triangle
-		 * That point on the spherical triangle is the sampled ray direction */
-		const float z = 1.0f - randv * (1.0f - dot(C_, B));
-		ls->D = z * B + safe_sqrtf(1.0f - z*z) * safe_normalize(C_ - dot(C_, B) * B);
-
-		/* calculate intersection with the planar triangle */
-		if(!ray_triangle_intersect(P, ls->D, FLT_MAX,
+  /* A naive heuristic to decide between costly solid angle sampling
+   * and simple area sampling, comparing the distance to the triangle plane
+   * to the length of the edges of the triangle. */
+
+  float3 V[3];
+  bool has_motion = triangle_world_space_vertices(kg, object, prim, time, V);
+
+  const float3 e0 = V[1] - V[0];
+  const float3 e1 = V[2] - V[0];
+  const float3 e2 = V[2] - V[1];
+  const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
+  const float3 N0 = cross(e0, e1);
+  float Nl = 0.0f;
+  ls->Ng = safe_normalize_len(N0, &Nl);
+  float area = 0.5f * Nl;
+
+  /* flip normal if necessary */
+  const int object_flag = kernel_tex_fetch(__object_flag, object);
+  if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+    ls->Ng = -ls->Ng;
+  }
+  ls->eval_fac = 1.0f;
+  ls->shader = kernel_tex_fetch(__tri_shader, prim);
+  ls->object = object;
+  ls->prim = prim;
+  ls->lamp = LAMP_NONE;
+  ls->shader |= SHADER_USE_MIS;
+  ls->type = LIGHT_TRIANGLE;
+
+  float distance_to_plane = fabsf(dot(N0, V[0] - P) / dot(N0, N0));
+
+  if (longest_edge_squared > distance_to_plane * distance_to_plane) {
+    /* see James Arvo, "Stratified Sampling of Spherical Triangles"
+     * http://www.graphics.cornell.edu/pubs/1995/Arv95c.pdf */
+
+    /* project the triangle to the unit sphere
+     * and calculate its edges and angles */
+    const float3 v0_p = V[0] - P;
+    const float3 v1_p = V[1] - P;
+    const float3 v2_p = V[2] - P;
+
+    const float3 u01 = safe_normalize(cross(v0_p, v1_p));
+    const float3 u02 = safe_normalize(cross(v0_p, v2_p));
+    const float3 u12 = safe_normalize(cross(v1_p, v2_p));
+
+    const float3 A = safe_normalize(v0_p);
+    const float3 B = safe_normalize(v1_p);
+    const float3 C = safe_normalize(v2_p);
+
+    const float cos_alpha = dot(u02, u01);
+    const float cos_beta = -dot(u01, u12);
+    const float cos_gamma = dot(u02, u12);
+
+    /* calculate dihedral angles */
+    const float alpha = fast_acosf(cos_alpha);
+    const float beta = fast_acosf(cos_beta);
+    const float gamma = fast_acosf(cos_gamma);
+    /* the area of the unit spherical triangle = solid angle */
+    const float solid_angle = alpha + beta + gamma - M_PI_F;
+
+    /* precompute a few things
+     * these could be re-used to take several samples
+     * as they are independent of randu/randv */
+    const float cos_c = dot(A, B);
+    const float sin_alpha = fast_sinf(alpha);
+    const float product = sin_alpha * cos_c;
+
+    /* Select a random sub-area of the spherical triangle
+     * and calculate the third vertex C_ of that new triangle */
+    const float phi = randu * solid_angle - alpha;
+    float s, t;
+    fast_sincosf(phi, &s, &t);
+    const float u = t - cos_alpha;
+    const float v = s + product;
+
+    const float3 U = safe_normalize(C - dot(C, A) * A);
+
+    float q = 1.0f;
+    const float det = ((v * s + u * t) * sin_alpha);
+    if (det != 0.0f) {
+      q = ((v * t - u * s) * cos_alpha - v) / det;
+    }
+    const float temp = max(1.0f - q * q, 0.0f);
+
+    const float3 C_ = safe_normalize(q * A + sqrtf(temp) * U);
+
+    /* Finally, select a random point along the edge of the new triangle
+     * That point on the spherical triangle is the sampled ray direction */
+    const float z = 1.0f - randv * (1.0f - dot(C_, B));
+    ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B);
+
+    /* calculate intersection with the planar triangle */
+    if (!ray_triangle_intersect(P,
+                                ls->D,
+                                FLT_MAX,
 #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-		                           (ssef*)V,
+                                (ssef *)V,
 #else
-		                           V[0], V[1], V[2],
+                                V[0],
+                                V[1],
+                                V[2],
 #endif
-		                           &ls->u, &ls->v, &ls->t)) {
-			ls->pdf = 0.0f;
-			return;
-		}
-
-		ls->P = P + ls->D * ls->t;
-
-		/* pdf_triangles is calculated over triangle area, but we're sampling over solid angle */
-		if(UNLIKELY(solid_angle == 0.0f)) {
-			ls->pdf = 0.0f;
-			return;
-		}
-		else {
-			if(has_motion) {
-				/* get the center frame vertices, this is what the PDF was calculated from */
-				triangle_world_space_vertices(kg, object, prim, -1.0f, V);
-				area = triangle_area(V[0], V[1], V[2]);
-			}
-			const float pdf = area * kernel_data.integrator.pdf_triangles;
-			ls->pdf = pdf / solid_angle;
-		}
-	}
-	else {
-		/* compute random point in triangle */
-		randu = sqrtf(randu);
-
-		const float u = 1.0f - randu;
-		const float v = randv*randu;
-		const float t = 1.0f - u - v;
-		ls->P = u * V[0] + v * V[1] + t * V[2];
-		/* compute incoming direction, distance and pdf */
-		ls->D = normalize_len(ls->P - P, &ls->t);
-		ls->pdf = triangle_light_pdf_area(kg, ls->Ng, -ls->D, ls->t);
-		if(has_motion && area != 0.0f) {
-			/* scale the PDF.
-			 * area = the area the sample was taken from
-			 * area_pre = the are from which pdf_triangles was calculated from */
-			triangle_world_space_vertices(kg, object, prim, -1.0f, V);
-			const float area_pre = triangle_area(V[0], V[1], V[2]);
-			ls->pdf = ls->pdf * area_pre / area;
-		}
-		ls->u = u;
-		ls->v = v;
-	}
+                                &ls->u,
+                                &ls->v,
+                                &ls->t)) {
+      ls->pdf = 0.0f;
+      return;
+    }
+
+    ls->P = P + ls->D * ls->t;
+
+    /* pdf_triangles is calculated over triangle area, but we're sampling over solid angle */
+    if (UNLIKELY(solid_angle == 0.0f)) {
+      ls->pdf = 0.0f;
+      return;
+    }
+    else {
+      if (has_motion) {
+        /* get the center frame vertices, this is what the PDF was calculated from */
+        triangle_world_space_vertices(kg, object, prim, -1.0f, V);
+        area = triangle_area(V[0], V[1], V[2]);
+      }
+      const float pdf = area * kernel_data.integrator.pdf_triangles;
+      ls->pdf = pdf / solid_angle;
+    }
+  }
+  else {
+    /* compute random point in triangle */
+    randu = sqrtf(randu);
+
+    const float u = 1.0f - randu;
+    const float v = randv * randu;
+    const float t = 1.0f - u - v;
+    ls->P = u * V[0] + v * V[1] + t * V[2];
+    /* compute incoming direction, distance and pdf */
+    ls->D = normalize_len(ls->P - P, &ls->t);
+    ls->pdf = triangle_light_pdf_area(kg, ls->Ng, -ls->D, ls->t);
+    if (has_motion && area != 0.0f) {
+      /* scale the PDF.
+       * area = the area the sample was taken from
+       * area_pre = the are from which pdf_triangles was calculated from */
+      triangle_world_space_vertices(kg, object, prim, -1.0f, V);
+      const float area_pre = triangle_area(V[0], V[1], V[2]);
+      ls->pdf = ls->pdf * area_pre / area;
+    }
+    ls->u = u;
+    ls->v = v;
+  }
 }
 
 /* Light Distribution */
 
 ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
 {
-	/* This is basically std::upper_bound as used by pbrt, to find a point light or
-	 * triangle to emit from, proportional to area. a good improvement would be to
-	 * also sample proportional to power, though it's not so well defined with
-	 * arbitrary shaders. */
-	int first = 0;
-	int len = kernel_data.integrator.num_distribution + 1;
-	float r = *randu;
-
-	while(len > 0) {
-		int half_len = len >> 1;
-		int middle = first + half_len;
-
-		if(r < kernel_tex_fetch(__light_distribution, middle).totarea) {
-			len = half_len;
-		}
-		else {
-			first = middle + 1;
-			len = len - half_len - 1;
-		}
-	}
-
-	/* Clamping should not be needed but float rounding errors seem to
-	 * make this fail on rare occasions. */
-	int index = clamp(first-1, 0, kernel_data.integrator.num_distribution-1);
-
-	/* Rescale to reuse random number. this helps the 2D samples within
-	 * each area light be stratified as well. */
-	float distr_min = kernel_tex_fetch(__light_distribution, index).totarea;
-	float distr_max = kernel_tex_fetch(__light_distribution, index+1).totarea;
-	*randu = (r - distr_min)/(distr_max - distr_min);
-
-	return index;
+  /* This is basically std::upper_bound as used by pbrt, to find a point light or
+   * triangle to emit from, proportional to area. a good improvement would be to
+   * also sample proportional to power, though it's not so well defined with
+   * arbitrary shaders. */
+  int first = 0;
+  int len = kernel_data.integrator.num_distribution + 1;
+  float r = *randu;
+
+  while (len > 0) {
+    int half_len = len >> 1;
+    int middle = first + half_len;
+
+    if (r < kernel_tex_fetch(__light_distribution, middle).totarea) {
+      len = half_len;
+    }
+    else {
+      first = middle + 1;
+      len = len - half_len - 1;
+    }
+  }
+
+  /* Clamping should not be needed but float rounding errors seem to
+   * make this fail on rare occasions. */
+  int index = clamp(first - 1, 0, kernel_data.integrator.num_distribution - 1);
+
+  /* Rescale to reuse random number. this helps the 2D samples within
+   * each area light be stratified as well. */
+  float distr_min = kernel_tex_fetch(__light_distribution, index).totarea;
+  float distr_max = kernel_tex_fetch(__light_distribution, index + 1).totarea;
+  *randu = (r - distr_min) / (distr_max - distr_min);
+
+  return index;
 }
 
 /* Generic Light */
 
 ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce)
 {
-	return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
+  return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
 }
 
-ccl_device_noinline bool light_sample(KernelGlobals *kg,
-                                      float randu,
-                                      float randv,
-                                      float time,
-                                      float3 P,
-                                      int bounce,
-                                      LightSample *ls)
+ccl_device_noinline bool light_sample(
+    KernelGlobals *kg, float randu, float randv, float time, float3 P, int bounce, LightSample *ls)
 {
-	/* sample index */
-	int index = light_distribution_sample(kg, &randu);
-
-	/* fetch light data */
-	const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution, index);
-	int prim = kdistribution->prim;
-
-	if(prim >= 0) {
-		int object = kdistribution->mesh_light.object_id;
-		int shader_flag = kdistribution->mesh_light.shader_flag;
-
-		triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
-		ls->shader |= shader_flag;
-		return (ls->pdf > 0.0f);
-	}
-	else {
-		int lamp = -prim-1;
-
-		if(UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
-			return false;
-		}
-
-		return lamp_light_sample(kg, lamp, randu, randv, P, ls);
-	}
+  /* sample index */
+  int index = light_distribution_sample(kg, &randu);
+
+  /* fetch light data */
+  const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution,
+                                                                              index);
+  int prim = kdistribution->prim;
+
+  if (prim >= 0) {
+    int object = kdistribution->mesh_light.object_id;
+    int shader_flag = kdistribution->mesh_light.shader_flag;
+
+    triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
+    ls->shader |= shader_flag;
+    return (ls->pdf > 0.0f);
+  }
+  else {
+    int lamp = -prim - 1;
+
+    if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
+      return false;
+    }
+
+    return lamp_light_sample(kg, lamp, randu, randv, P, ls);
+  }
 }
 
 ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
 {
-	return kernel_tex_fetch(__lights, index).samples;
+  return kernel_tex_fetch(__lights, index).samples;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_math.h b/intern/cycles/kernel/kernel_math.h
index a8a43f3ea4a..96391db7649 100644
--- a/intern/cycles/kernel/kernel_math.h
+++ b/intern/cycles/kernel/kernel_math.h
@@ -25,4 +25,4 @@
 #include "util/util_texture.h"
 #include "util/util_transform.h"
 
-#endif  /* __KERNEL_MATH_H__ */
+#endif /* __KERNEL_MATH_H__ */
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index dde93844dd3..a933be970c2 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -38,248 +38,245 @@ CCL_NAMESPACE_BEGIN
 /* distribute uniform xy on [0,1] over unit disk [-1,1] */
 ccl_device void to_unit_disk(float *x, float *y)
 {
-	float phi = M_2PI_F * (*x);
-	float r = sqrtf(*y);
+  float phi = M_2PI_F * (*x);
+  float r = sqrtf(*y);
 
-	*x = r * cosf(phi);
-	*y = r * sinf(phi);
+  *x = r * cosf(phi);
+  *y = r * sinf(phi);
 }
 
 /* return an orthogonal tangent and bitangent given a normal and tangent that
  * may not be exactly orthogonal */
 ccl_device void make_orthonormals_tangent(const float3 N, const float3 T, float3 *a, float3 *b)
 {
-	*b = normalize(cross(N, T));
-	*a = cross(*b, N);
+  *b = normalize(cross(N, T));
+  *a = cross(*b, N);
 }
 
 /* sample direction with cosine weighted distributed in hemisphere */
-ccl_device_inline void sample_cos_hemisphere(const float3 N,
-	float randu, float randv, float3 *omega_in, float *pdf)
+ccl_device_inline void sample_cos_hemisphere(
+    const float3 N, float randu, float randv, float3 *omega_in, float *pdf)
 {
-	to_unit_disk(&randu, &randv);
-	float costheta = sqrtf(max(1.0f - randu * randu - randv * randv, 0.0f));
-	float3 T, B;
-	make_orthonormals(N, &T, &B);
-	*omega_in = randu * T + randv * B + costheta * N;
-	*pdf = costheta *M_1_PI_F;
+  to_unit_disk(&randu, &randv);
+  float costheta = sqrtf(max(1.0f - randu * randu - randv * randv, 0.0f));
+  float3 T, B;
+  make_orthonormals(N, &T, &B);
+  *omega_in = randu * T + randv * B + costheta * N;
+  *pdf = costheta * M_1_PI_F;
 }
 
 /* sample direction uniformly distributed in hemisphere */
-ccl_device_inline void sample_uniform_hemisphere(const float3 N,
-                                                 float randu, float randv,
-                                                 float3 *omega_in, float *pdf)
+ccl_device_inline void sample_uniform_hemisphere(
+    const float3 N, float randu, float randv, float3 *omega_in, float *pdf)
 {
-	float z = randu;
-	float r = sqrtf(max(0.0f, 1.0f - z*z));
-	float phi = M_2PI_F * randv;
-	float x = r * cosf(phi);
-	float y = r * sinf(phi);
-
-	float3 T, B;
-	make_orthonormals (N, &T, &B);
-	*omega_in = x * T + y * B + z * N;
-	*pdf = 0.5f * M_1_PI_F;
+  float z = randu;
+  float r = sqrtf(max(0.0f, 1.0f - z * z));
+  float phi = M_2PI_F * randv;
+  float x = r * cosf(phi);
+  float y = r * sinf(phi);
+
+  float3 T, B;
+  make_orthonormals(N, &T, &B);
+  *omega_in = x * T + y * B + z * N;
+  *pdf = 0.5f * M_1_PI_F;
 }
 
 /* sample direction uniformly distributed in cone */
-ccl_device_inline void sample_uniform_cone(const float3 N, float angle,
-                                           float randu, float randv,
-                                           float3 *omega_in, float *pdf)
+ccl_device_inline void sample_uniform_cone(
+    const float3 N, float angle, float randu, float randv, float3 *omega_in, float *pdf)
 {
-	float z = cosf(angle*randu);
-	float r = sqrtf(max(0.0f, 1.0f - z*z));
-	float phi = M_2PI_F * randv;
-	float x = r * cosf(phi);
-	float y = r * sinf(phi);
-
-	float3 T, B;
-	make_orthonormals (N, &T, &B);
-	*omega_in = x * T + y * B + z * N;
-	*pdf = 0.5f * M_1_PI_F / (1.0f - cosf(angle));
+  float z = cosf(angle * randu);
+  float r = sqrtf(max(0.0f, 1.0f - z * z));
+  float phi = M_2PI_F * randv;
+  float x = r * cosf(phi);
+  float y = r * sinf(phi);
+
+  float3 T, B;
+  make_orthonormals(N, &T, &B);
+  *omega_in = x * T + y * B + z * N;
+  *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(angle));
 }
 
 /* sample uniform point on the surface of a sphere */
 ccl_device float3 sample_uniform_sphere(float u1, float u2)
 {
-	float z = 1.0f - 2.0f*u1;
-	float r = sqrtf(fmaxf(0.0f, 1.0f - z*z));
-	float phi = M_2PI_F*u2;
-	float x = r*cosf(phi);
-	float y = r*sinf(phi);
+  float z = 1.0f - 2.0f * u1;
+  float r = sqrtf(fmaxf(0.0f, 1.0f - z * z));
+  float phi = M_2PI_F * u2;
+  float x = r * cosf(phi);
+  float y = r * sinf(phi);
 
-	return make_float3(x, y, z);
+  return make_float3(x, y, z);
 }
 
 ccl_device float balance_heuristic(float a, float b)
 {
-	return (a)/(a + b);
+  return (a) / (a + b);
 }
 
 ccl_device float balance_heuristic_3(float a, float b, float c)
 {
-	return (a)/(a + b + c);
+  return (a) / (a + b + c);
 }
 
 ccl_device float power_heuristic(float a, float b)
 {
-	return (a*a)/(a*a + b*b);
+  return (a * a) / (a * a + b * b);
 }
 
 ccl_device float power_heuristic_3(float a, float b, float c)
 {
-	return (a*a)/(a*a + b*b + c*c);
+  return (a * a) / (a * a + b * b + c * c);
 }
 
 ccl_device float max_heuristic(float a, float b)
 {
-	return (a > b)? 1.0f: 0.0f;
+  return (a > b) ? 1.0f : 0.0f;
 }
 
 /* distribute uniform xy on [0,1] over unit disk [-1,1], with concentric mapping
  * to better preserve stratification for some RNG sequences */
 ccl_device float2 concentric_sample_disk(float u1, float u2)
 {
-	float phi, r;
-	float a = 2.0f*u1 - 1.0f;
-	float b = 2.0f*u2 - 1.0f;
-
-	if(a == 0.0f && b == 0.0f) {
-		return make_float2(0.0f, 0.0f);
-	}
-	else if(a*a > b*b) {
-		r = a;
-		phi = M_PI_4_F * (b/a);
-	}
-	else {
-		r = b;
-		phi = M_PI_2_F - M_PI_4_F * (a/b);
-	}
-
-	return make_float2(r*cosf(phi), r*sinf(phi));
+  float phi, r;
+  float a = 2.0f * u1 - 1.0f;
+  float b = 2.0f * u2 - 1.0f;
+
+  if (a == 0.0f && b == 0.0f) {
+    return make_float2(0.0f, 0.0f);
+  }
+  else if (a * a > b * b) {
+    r = a;
+    phi = M_PI_4_F * (b / a);
+  }
+  else {
+    r = b;
+    phi = M_PI_2_F - M_PI_4_F * (a / b);
+  }
+
+  return make_float2(r * cosf(phi), r * sinf(phi));
 }
 
 /* sample point in unit polygon with given number of corners and rotation */
 ccl_device float2 regular_polygon_sample(float corners, float rotation, float u, float v)
 {
-	/* sample corner number and reuse u */
-	float corner = floorf(u*corners);
-	u = u*corners - corner;
+  /* sample corner number and reuse u */
+  float corner = floorf(u * corners);
+  u = u * corners - corner;
 
-	/* uniform sampled triangle weights */
-	u = sqrtf(u);
-	v = v*u;
-	u = 1.0f - u;
+  /* uniform sampled triangle weights */
+  u = sqrtf(u);
+  v = v * u;
+  u = 1.0f - u;
 
-	/* point in triangle */
-	float angle = M_PI_F/corners;
-	float2 p = make_float2((u + v)*cosf(angle), (u - v)*sinf(angle));
+  /* point in triangle */
+  float angle = M_PI_F / corners;
+  float2 p = make_float2((u + v) * cosf(angle), (u - v) * sinf(angle));
 
-	/* rotate */
-	rotation += corner*2.0f*angle;
+  /* rotate */
+  rotation += corner * 2.0f * angle;
 
-	float cr = cosf(rotation);
-	float sr = sinf(rotation);
+  float cr = cosf(rotation);
+  float sr = sinf(rotation);
 
-	return make_float2(cr*p.x - sr*p.y, sr*p.x + cr*p.y);
+  return make_float2(cr * p.x - sr * p.y, sr * p.x + cr * p.y);
 }
 
 ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
 {
-	float3 R = 2*dot(N, I)*N - I;
-
-	/* Reflection rays may always be at least as shallow as the incoming ray. */
-	float threshold = min(0.9f*dot(Ng, I), 0.01f);
-	if(dot(Ng, R) >= threshold) {
-		return N;
-	}
-
-	/* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane.
-	 * The X axis is found by normalizing the component of N that's orthogonal to Ng.
-	 * The Y axis isn't actually needed.
-	 */
-	float NdotNg = dot(N, Ng);
-	float3 X = normalize(N - NdotNg*Ng);
-
-	/* Calculate N.z and N.x in the local coordinate system.
-	 *
-	 * The goal of this computation is to find a N' that is rotated towards Ng just enough
-	 * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t.
-	 *
-	 * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t.
-	 *
-	 * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t.
-	 *
-	 * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that
-	 * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z .
-	 *
-	 * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2).
-	 *
-	 * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
-	 *
-	 * The only unknown here is N'.z, so we can solve for that.
-	 *
-	 * The equation has four solutions in general:
-	 *
-	 * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2))
-	 * We can simplify this expression a bit by grouping terms:
-	 *
-	 * a = I.x^2 + I.z^2
-	 * b = sqrt(I.x^2 * (a - t^2))
-	 * c = I.z*t + a
-	 * N'.z = +-sqrt(0.5*(+-b + c)/a)
-	 *
-	 * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere.
-	 */
-	float Ix = dot(I, X), Iz = dot(I, Ng);
-	float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
-	float a = Ix2 + Iz2;
-
-	float b = safe_sqrtf(Ix2*(a - sqr(threshold)));
-	float c = Iz*threshold + a;
-
-	/* Evaluate both solutions.
-	 * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first.
-	 * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */
-	float fac = 0.5f/a;
-	float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c);
-	bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f));
-	bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f));
-
-	float2 N_new;
-	if(valid1 && valid2) {
-		/* If both are possible, do the expensive reflection-based check. */
-		float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2));
-		float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2));
-
-		float R1 = 2*(N1.x*Ix + N1.y*Iz)*N1.y - Iz;
-		float R2 = 2*(N2.x*Ix + N2.y*Iz)*N2.y - Iz;
-
-		valid1 = (R1 >= 1e-5f);
-		valid2 = (R2 >= 1e-5f);
-		if(valid1 && valid2) {
-			/* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input
-			 * (if the original reflection wasn't shallow, we would not be in this part of the function). */
-			N_new = (R1 < R2)? N1 : N2;
-		}
-		else {
-			/* If only one reflection is valid (= positive), pick that one. */
-			N_new = (R1 > R2)? N1 : N2;
-		}
-
-	}
-	else if(valid1 || valid2) {
-		/* Only one solution passes the N'.z criterium, so pick that one. */
-		float Nz2 = valid1? N1_z2 : N2_z2;
-		N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2));
-	}
-	else {
-		return Ng;
-	}
-
-	return N_new.x*X + N_new.y*Ng;
+  float3 R = 2 * dot(N, I) * N - I;
+
+  /* Reflection rays may always be at least as shallow as the incoming ray. */
+  float threshold = min(0.9f * dot(Ng, I), 0.01f);
+  if (dot(Ng, R) >= threshold) {
+    return N;
+  }
+
+  /* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane.
+   * The X axis is found by normalizing the component of N that's orthogonal to Ng.
+   * The Y axis isn't actually needed.
+   */
+  float NdotNg = dot(N, Ng);
+  float3 X = normalize(N - NdotNg * Ng);
+
+  /* Calculate N.z and N.x in the local coordinate system.
+   *
+   * The goal of this computation is to find a N' that is rotated towards Ng just enough
+   * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t.
+   *
+   * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t.
+   *
+   * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t.
+   *
+   * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that
+   * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z .
+   *
+   * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2).
+   *
+   * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
+   *
+   * The only unknown here is N'.z, so we can solve for that.
+   *
+   * The equation has four solutions in general:
+   *
+   * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2))
+   * We can simplify this expression a bit by grouping terms:
+   *
+   * a = I.x^2 + I.z^2
+   * b = sqrt(I.x^2 * (a - t^2))
+   * c = I.z*t + a
+   * N'.z = +-sqrt(0.5*(+-b + c)/a)
+   *
+   * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere.
+   */
+  float Ix = dot(I, X), Iz = dot(I, Ng);
+  float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+  float a = Ix2 + Iz2;
+
+  float b = safe_sqrtf(Ix2 * (a - sqr(threshold)));
+  float c = Iz * threshold + a;
+
+  /* Evaluate both solutions.
+   * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first.
+   * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */
+  float fac = 0.5f / a;
+  float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
+  bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f));
+  bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f));
+
+  float2 N_new;
+  if (valid1 && valid2) {
+    /* If both are possible, do the expensive reflection-based check. */
+    float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2));
+    float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2));
+
+    float R1 = 2 * (N1.x * Ix + N1.y * Iz) * N1.y - Iz;
+    float R2 = 2 * (N2.x * Ix + N2.y * Iz) * N2.y - Iz;
+
+    valid1 = (R1 >= 1e-5f);
+    valid2 = (R2 >= 1e-5f);
+    if (valid1 && valid2) {
+      /* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input
+       * (if the original reflection wasn't shallow, we would not be in this part of the function). */
+      N_new = (R1 < R2) ? N1 : N2;
+    }
+    else {
+      /* If only one reflection is valid (= positive), pick that one. */
+      N_new = (R1 > R2) ? N1 : N2;
+    }
+  }
+  else if (valid1 || valid2) {
+    /* Only one solution passes the N'.z criterium, so pick that one. */
+    float Nz2 = valid1 ? N1_z2 : N2_z2;
+    N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2));
+  }
+  else {
+    return Ng;
+  }
+
+  return N_new.x * X + N_new.y * Ng;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_MONTECARLO_CL__ */
+#endif /* __KERNEL_MONTECARLO_CL__ */
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 08e9db05c39..462ec037ee7 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -15,7 +15,7 @@
  */
 
 #if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__)
-#define __ATOMIC_PASS_WRITE__
+#  define __ATOMIC_PASS_WRITE__
 #endif
 
 #include "kernel/kernel_id_passes.h"
@@ -24,56 +24,56 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value)
 {
-	ccl_global float *buf = buffer;
+  ccl_global float *buf = buffer;
 #ifdef __ATOMIC_PASS_WRITE__
-	atomic_add_and_fetch_float(buf, value);
+  atomic_add_and_fetch_float(buf, value);
 #else
-	*buf += value;
+  *buf += value;
 #endif
 }
 
 ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value)
 {
 #ifdef __ATOMIC_PASS_WRITE__
-	ccl_global float *buf_x = buffer + 0;
-	ccl_global float *buf_y = buffer + 1;
-	ccl_global float *buf_z = buffer + 2;
+  ccl_global float *buf_x = buffer + 0;
+  ccl_global float *buf_y = buffer + 1;
+  ccl_global float *buf_z = buffer + 2;
 
-	atomic_add_and_fetch_float(buf_x, value.x);
-	atomic_add_and_fetch_float(buf_y, value.y);
-	atomic_add_and_fetch_float(buf_z, value.z);
+  atomic_add_and_fetch_float(buf_x, value.x);
+  atomic_add_and_fetch_float(buf_y, value.y);
+  atomic_add_and_fetch_float(buf_z, value.z);
 #else
-	ccl_global float3 *buf = (ccl_global float3*)buffer;
-	*buf += value;
+  ccl_global float3 *buf = (ccl_global float3 *)buffer;
+  *buf += value;
 #endif
 }
 
 ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value)
 {
 #ifdef __ATOMIC_PASS_WRITE__
-	ccl_global float *buf_x = buffer + 0;
-	ccl_global float *buf_y = buffer + 1;
-	ccl_global float *buf_z = buffer + 2;
-	ccl_global float *buf_w = buffer + 3;
-
-	atomic_add_and_fetch_float(buf_x, value.x);
-	atomic_add_and_fetch_float(buf_y, value.y);
-	atomic_add_and_fetch_float(buf_z, value.z);
-	atomic_add_and_fetch_float(buf_w, value.w);
+  ccl_global float *buf_x = buffer + 0;
+  ccl_global float *buf_y = buffer + 1;
+  ccl_global float *buf_z = buffer + 2;
+  ccl_global float *buf_w = buffer + 3;
+
+  atomic_add_and_fetch_float(buf_x, value.x);
+  atomic_add_and_fetch_float(buf_y, value.y);
+  atomic_add_and_fetch_float(buf_z, value.z);
+  atomic_add_and_fetch_float(buf_w, value.w);
 #else
-	ccl_global float4 *buf = (ccl_global float4*)buffer;
-	*buf += value;
+  ccl_global float4 *buf = (ccl_global float4 *)buffer;
+  *buf += value;
 #endif
 }
 
 #ifdef __DENOISING_FEATURES__
 ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value)
 {
-	kernel_write_pass_float(buffer, value);
+  kernel_write_pass_float(buffer, value);
 
-	/* The online one-pass variance update that's used for the megakernel can't easily be implemented
-	 * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */
-	kernel_write_pass_float(buffer+1, value*value);
+  /* The online one-pass variance update that's used for the megakernel can't easily be implemented
+   * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */
+  kernel_write_pass_float(buffer + 1, value * value);
 }
 
 #  ifdef __ATOMIC_PASS_WRITE__
@@ -81,36 +81,39 @@ ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer
 #  else
 ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value)
 {
-	buffer[0] += value.x;
-	buffer[1] += value.y;
-	buffer[2] += value.z;
+  buffer[0] += value.x;
+  buffer[1] += value.y;
+  buffer[2] += value.z;
 }
 #  endif
 
 ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, float3 value)
 {
-	kernel_write_pass_float3_unaligned(buffer, value);
-	kernel_write_pass_float3_unaligned(buffer+3, value*value);
+  kernel_write_pass_float3_unaligned(buffer, value);
+  kernel_write_pass_float3_unaligned(buffer + 3, value * value);
 }
 
-ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_global float *buffer,
-	int sample, float path_total, float path_total_shaded)
+ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg,
+                                                     ccl_global float *buffer,
+                                                     int sample,
+                                                     float path_total,
+                                                     float path_total_shaded)
 {
-	if(kernel_data.film.pass_denoising_data == 0)
-		return;
+  if (kernel_data.film.pass_denoising_data == 0)
+    return;
 
-	buffer += (sample & 1)? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
+  buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
 
-	path_total = ensure_finite(path_total);
-	path_total_shaded = ensure_finite(path_total_shaded);
+  path_total = ensure_finite(path_total);
+  path_total_shaded = ensure_finite(path_total_shaded);
 
-	kernel_write_pass_float(buffer, path_total);
-	kernel_write_pass_float(buffer+1, path_total_shaded);
+  kernel_write_pass_float(buffer, path_total);
+  kernel_write_pass_float(buffer + 1, path_total_shaded);
 
-	float value = path_total_shaded / max(path_total, 1e-7f);
-	kernel_write_pass_float(buffer+2, value*value);
+  float value = path_total_shaded / max(path_total, 1e-7f);
+  kernel_write_pass_float(buffer + 2, value * value);
 }
-#endif  /* __DENOISING_FEATURES__ */
+#endif /* __DENOISING_FEATURES__ */
 
 ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
                                                         ShaderData *sd,
@@ -118,52 +121,52 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
                                                         PathRadiance *L)
 {
 #ifdef __DENOISING_FEATURES__
-	if(state->denoising_feature_weight == 0.0f) {
-		return;
-	}
-
-	L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length);
-
-	/* Skip implicitly transparent surfaces. */
-	if(sd->flag & SD_HAS_ONLY_VOLUME) {
-		return;
-	}
-
-	float3 normal = make_float3(0.0f, 0.0f, 0.0f);
-	float3 albedo = make_float3(0.0f, 0.0f, 0.0f);
-	float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
-
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
-
-		if(!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
-			continue;
-
-		/* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
-		normal += sc->N * sc->sample_weight;
-		sum_weight += sc->sample_weight;
-		if(bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
-			albedo += sc->weight;
-			sum_nonspecular_weight += sc->sample_weight;
-		}
-	}
-
-	/* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
-	if((sum_weight == 0.0f) || (sum_nonspecular_weight*4.0f > sum_weight)) {
-		if(sum_weight != 0.0f) {
-			normal /= sum_weight;
-		}
-		L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal);
-		L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo);
-
-		state->denoising_feature_weight = 0.0f;
-	}
+  if (state->denoising_feature_weight == 0.0f) {
+    return;
+  }
+
+  L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length);
+
+  /* Skip implicitly transparent surfaces. */
+  if (sd->flag & SD_HAS_ONLY_VOLUME) {
+    return;
+  }
+
+  float3 normal = make_float3(0.0f, 0.0f, 0.0f);
+  float3 albedo = make_float3(0.0f, 0.0f, 0.0f);
+  float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
+
+    if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
+      continue;
+
+    /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
+    normal += sc->N * sc->sample_weight;
+    sum_weight += sc->sample_weight;
+    if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
+      albedo += sc->weight;
+      sum_nonspecular_weight += sc->sample_weight;
+    }
+  }
+
+  /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
+  if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) {
+    if (sum_weight != 0.0f) {
+      normal /= sum_weight;
+    }
+    L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal);
+    L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo);
+
+    state->denoising_feature_weight = 0.0f;
+  }
 #else
-	(void) kg;
-	(void) sd;
-	(void) state;
-	(void) L;
-#endif  /* __DENOISING_FEATURES__ */
+  (void)kg;
+  (void)sd;
+  (void)state;
+  (void)L;
+#endif /* __DENOISING_FEATURES__ */
 }
 
 #ifdef __KERNEL_DEBUG__
@@ -171,203 +174,221 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
                                                  ccl_global float *buffer,
                                                  PathRadiance *L)
 {
-	int flag = kernel_data.film.pass_flag;
-	if(flag & PASSMASK(BVH_TRAVERSED_NODES)) {
-		kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
-		                        L->debug_data.num_bvh_traversed_nodes);
-	}
-	if(flag & PASSMASK(BVH_TRAVERSED_INSTANCES)) {
-		kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
-		                        L->debug_data.num_bvh_traversed_instances);
-	}
-	if(flag & PASSMASK(BVH_INTERSECTIONS)) {
-		kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
-		                        L->debug_data.num_bvh_intersections);
-	}
-	if(flag & PASSMASK(RAY_BOUNCES)) {
-		kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
-		                        L->debug_data.num_ray_bounces);
-	}
+  int flag = kernel_data.film.pass_flag;
+  if (flag & PASSMASK(BVH_TRAVERSED_NODES)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
+                            L->debug_data.num_bvh_traversed_nodes);
+  }
+  if (flag & PASSMASK(BVH_TRAVERSED_INSTANCES)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
+                            L->debug_data.num_bvh_traversed_instances);
+  }
+  if (flag & PASSMASK(BVH_INTERSECTIONS)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
+                            L->debug_data.num_bvh_intersections);
+  }
+  if (flag & PASSMASK(RAY_BOUNCES)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
+                            L->debug_data.num_ray_bounces);
+  }
 }
-#endif  /* __KERNEL_DEBUG__ */
+#endif /* __KERNEL_DEBUG__ */
 
 #ifdef __KERNEL_CPU__
-#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name)
-ccl_device_inline size_t kernel_write_id_pass_cpu(float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map)
+#  define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \
+    kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name)
+ccl_device_inline size_t kernel_write_id_pass_cpu(
+    float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map)
 {
-	if(map) {
-		(*map)[id] += matte_weight;
-		return 0;
-	}
-#else  /* __KERNEL_CPU__ */
-#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight)
-ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer, size_t depth, float id, float matte_weight)
+  if (map) {
+    (*map)[id] += matte_weight;
+    return 0;
+  }
+#else /* __KERNEL_CPU__ */
+#  define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \
+    kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight)
+ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer,
+                                                   size_t depth,
+                                                   float id,
+                                                   float matte_weight)
 {
-#endif  /* __KERNEL_CPU__ */
-	kernel_write_id_slots(buffer, depth, id, matte_weight);
-	return depth * 2;
+#endif /* __KERNEL_CPU__ */
+  kernel_write_id_slots(buffer, depth, id, matte_weight);
+  return depth * 2;
 }
 
-ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L,
-	ShaderData *sd, ccl_addr_space PathState *state, float3 throughput)
+ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg,
+                                                ccl_global float *buffer,
+                                                PathRadiance *L,
+                                                ShaderData *sd,
+                                                ccl_addr_space PathState *state,
+                                                float3 throughput)
 {
 #ifdef __PASSES__
-	int path_flag = state->flag;
-
-	if(!(path_flag & PATH_RAY_CAMERA))
-		return;
-
-	int flag = kernel_data.film.pass_flag;
-	int light_flag = kernel_data.film.light_pass_flag;
-
-	if(!((flag | light_flag) & PASS_ANY))
-		return;
-
-	if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
-		if(!(sd->flag & SD_TRANSPARENT) ||
-		   kernel_data.film.pass_alpha_threshold == 0.0f ||
-		   average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold)
-		{
-			if(state->sample == 0) {
-				if(flag & PASSMASK(DEPTH)) {
-					float depth = camera_distance(kg, sd->P);
-					kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
-				}
-				if(flag & PASSMASK(OBJECT_ID)) {
-					float id = object_pass_id(kg, sd->object);
-					kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
-				}
-				if(flag & PASSMASK(MATERIAL_ID)) {
-					float id = shader_pass_id(kg, sd);
-					kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
-				}
-			}
-
-			if(flag & PASSMASK(NORMAL)) {
-				float3 normal = shader_bsdf_average_normal(kg, sd);
-				kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
-			}
-			if(flag & PASSMASK(UV)) {
-				float3 uv = primitive_uv(kg, sd);
-				kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
-			}
-			if(flag & PASSMASK(MOTION)) {
-				float4 speed = primitive_motion_vector(kg, sd);
-				kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
-				kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
-			}
-
-			state->flag |= PATH_RAY_SINGLE_PASS_DONE;
-		}
-	}
-
-	if(kernel_data.film.cryptomatte_passes) {
-		const float matte_weight = average(throughput) * (1.0f - average(shader_bsdf_transparency(kg, sd)));
-		if(matte_weight > 0.0f) {
-			ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
-			if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
-				float id = object_cryptomatte_id(kg, sd->object);
-				cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object);
-			}
-			if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
-				float id = shader_cryptomatte_id(kg, sd->shader);
-				cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material);
-			}
-			if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
-				float id = object_cryptomatte_asset_id(kg, sd->object);
-				cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset);
-			}
-		}
-	}
-
-
-	if(light_flag & PASSMASK_COMPONENT(DIFFUSE))
-		L->color_diffuse += shader_bsdf_diffuse(kg, sd)*throughput;
-	if(light_flag & PASSMASK_COMPONENT(GLOSSY))
-		L->color_glossy += shader_bsdf_glossy(kg, sd)*throughput;
-	if(light_flag & PASSMASK_COMPONENT(TRANSMISSION))
-		L->color_transmission += shader_bsdf_transmission(kg, sd)*throughput;
-	if(light_flag & PASSMASK_COMPONENT(SUBSURFACE))
-		L->color_subsurface += shader_bsdf_subsurface(kg, sd)*throughput;
-
-	if(light_flag & PASSMASK(MIST)) {
-		/* bring depth into 0..1 range */
-		float mist_start = kernel_data.film.mist_start;
-		float mist_inv_depth = kernel_data.film.mist_inv_depth;
-
-		float depth = camera_distance(kg, sd->P);
-		float mist = saturate((depth - mist_start)*mist_inv_depth);
-
-		/* falloff */
-		float mist_falloff = kernel_data.film.mist_falloff;
-
-		if(mist_falloff == 1.0f)
-			;
-		else if(mist_falloff == 2.0f)
-			mist = mist*mist;
-		else if(mist_falloff == 0.5f)
-			mist = sqrtf(mist);
-		else
-			mist = powf(mist, mist_falloff);
-
-		/* modulate by transparency */
-		float3 alpha = shader_bsdf_alpha(kg, sd);
-		L->mist += (1.0f - mist)*average(throughput*alpha);
-	}
+  int path_flag = state->flag;
+
+  if (!(path_flag & PATH_RAY_CAMERA))
+    return;
+
+  int flag = kernel_data.film.pass_flag;
+  int light_flag = kernel_data.film.light_pass_flag;
+
+  if (!((flag | light_flag) & PASS_ANY))
+    return;
+
+  if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
+    if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
+        average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
+      if (state->sample == 0) {
+        if (flag & PASSMASK(DEPTH)) {
+          float depth = camera_distance(kg, sd->P);
+          kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
+        }
+        if (flag & PASSMASK(OBJECT_ID)) {
+          float id = object_pass_id(kg, sd->object);
+          kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
+        }
+        if (flag & PASSMASK(MATERIAL_ID)) {
+          float id = shader_pass_id(kg, sd);
+          kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
+        }
+      }
+
+      if (flag & PASSMASK(NORMAL)) {
+        float3 normal = shader_bsdf_average_normal(kg, sd);
+        kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
+      }
+      if (flag & PASSMASK(UV)) {
+        float3 uv = primitive_uv(kg, sd);
+        kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
+      }
+      if (flag & PASSMASK(MOTION)) {
+        float4 speed = primitive_motion_vector(kg, sd);
+        kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
+        kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
+      }
+
+      state->flag |= PATH_RAY_SINGLE_PASS_DONE;
+    }
+  }
+
+  if (kernel_data.film.cryptomatte_passes) {
+    const float matte_weight = average(throughput) *
+                               (1.0f - average(shader_bsdf_transparency(kg, sd)));
+    if (matte_weight > 0.0f) {
+      ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+      if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
+        float id = object_cryptomatte_id(kg, sd->object);
+        cryptomatte_buffer += WRITE_ID_SLOT(
+            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object);
+      }
+      if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
+        float id = shader_cryptomatte_id(kg, sd->shader);
+        cryptomatte_buffer += WRITE_ID_SLOT(
+            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material);
+      }
+      if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
+        float id = object_cryptomatte_asset_id(kg, sd->object);
+        cryptomatte_buffer += WRITE_ID_SLOT(
+            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset);
+      }
+    }
+  }
+
+  if (light_flag & PASSMASK_COMPONENT(DIFFUSE))
+    L->color_diffuse += shader_bsdf_diffuse(kg, sd) * throughput;
+  if (light_flag & PASSMASK_COMPONENT(GLOSSY))
+    L->color_glossy += shader_bsdf_glossy(kg, sd) * throughput;
+  if (light_flag & PASSMASK_COMPONENT(TRANSMISSION))
+    L->color_transmission += shader_bsdf_transmission(kg, sd) * throughput;
+  if (light_flag & PASSMASK_COMPONENT(SUBSURFACE))
+    L->color_subsurface += shader_bsdf_subsurface(kg, sd) * throughput;
+
+  if (light_flag & PASSMASK(MIST)) {
+    /* bring depth into 0..1 range */
+    float mist_start = kernel_data.film.mist_start;
+    float mist_inv_depth = kernel_data.film.mist_inv_depth;
+
+    float depth = camera_distance(kg, sd->P);
+    float mist = saturate((depth - mist_start) * mist_inv_depth);
+
+    /* falloff */
+    float mist_falloff = kernel_data.film.mist_falloff;
+
+    if (mist_falloff == 1.0f)
+      ;
+    else if (mist_falloff == 2.0f)
+      mist = mist * mist;
+    else if (mist_falloff == 0.5f)
+      mist = sqrtf(mist);
+    else
+      mist = powf(mist, mist_falloff);
+
+    /* modulate by transparency */
+    float3 alpha = shader_bsdf_alpha(kg, sd);
+    L->mist += (1.0f - mist) * average(throughput * alpha);
+  }
 #endif
 }
 
-ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L)
+ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg,
+                                                 ccl_global float *buffer,
+                                                 PathRadiance *L)
 {
 #ifdef __PASSES__
-	int light_flag = kernel_data.film.light_pass_flag;
-
-	if(!kernel_data.film.use_light_pass)
-		return;
-
-	if(light_flag & PASSMASK(DIFFUSE_INDIRECT))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse);
-	if(light_flag & PASSMASK(GLOSSY_INDIRECT))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy);
-	if(light_flag & PASSMASK(TRANSMISSION_INDIRECT))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, L->indirect_transmission);
-	if(light_flag & PASSMASK(SUBSURFACE_INDIRECT))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, L->indirect_subsurface);
-	if(light_flag & PASSMASK(VOLUME_INDIRECT))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter);
-	if(light_flag & PASSMASK(DIFFUSE_DIRECT))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse);
-	if(light_flag & PASSMASK(GLOSSY_DIRECT))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy);
-	if(light_flag & PASSMASK(TRANSMISSION_DIRECT))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, L->direct_transmission);
-	if(light_flag & PASSMASK(SUBSURFACE_DIRECT))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, L->direct_subsurface);
-	if(light_flag & PASSMASK(VOLUME_DIRECT))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter);
-
-	if(light_flag & PASSMASK(EMISSION))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission);
-	if(light_flag & PASSMASK(BACKGROUND))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background);
-	if(light_flag & PASSMASK(AO))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao);
-
-	if(light_flag & PASSMASK(DIFFUSE_COLOR))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse);
-	if(light_flag & PASSMASK(GLOSSY_COLOR))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy);
-	if(light_flag & PASSMASK(TRANSMISSION_COLOR))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, L->color_transmission);
-	if(light_flag & PASSMASK(SUBSURFACE_COLOR))
-		kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface);
-	if(light_flag & PASSMASK(SHADOW)) {
-		float4 shadow = L->shadow;
-		shadow.w = kernel_data.film.pass_shadow_scale;
-		kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow);
-	}
-	if(light_flag & PASSMASK(MIST))
-		kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist);
+  int light_flag = kernel_data.film.light_pass_flag;
+
+  if (!kernel_data.film.use_light_pass)
+    return;
+
+  if (light_flag & PASSMASK(DIFFUSE_INDIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse);
+  if (light_flag & PASSMASK(GLOSSY_INDIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy);
+  if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect,
+                             L->indirect_transmission);
+  if (light_flag & PASSMASK(SUBSURFACE_INDIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect,
+                             L->indirect_subsurface);
+  if (light_flag & PASSMASK(VOLUME_INDIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter);
+  if (light_flag & PASSMASK(DIFFUSE_DIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse);
+  if (light_flag & PASSMASK(GLOSSY_DIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy);
+  if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct,
+                             L->direct_transmission);
+  if (light_flag & PASSMASK(SUBSURFACE_DIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct,
+                             L->direct_subsurface);
+  if (light_flag & PASSMASK(VOLUME_DIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter);
+
+  if (light_flag & PASSMASK(EMISSION))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission);
+  if (light_flag & PASSMASK(BACKGROUND))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background);
+  if (light_flag & PASSMASK(AO))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao);
+
+  if (light_flag & PASSMASK(DIFFUSE_COLOR))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse);
+  if (light_flag & PASSMASK(GLOSSY_COLOR))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy);
+  if (light_flag & PASSMASK(TRANSMISSION_COLOR))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color,
+                             L->color_transmission);
+  if (light_flag & PASSMASK(SUBSURFACE_COLOR))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface);
+  if (light_flag & PASSMASK(SHADOW)) {
+    float4 shadow = L->shadow;
+    shadow.w = kernel_data.film.pass_shadow_scale;
+    kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow);
+  }
+  if (light_flag & PASSMASK(MIST))
+    kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist);
 #endif
 }
 
@@ -376,60 +397,54 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg,
                                            int sample,
                                            PathRadiance *L)
 {
-	PROFILING_INIT(kg, PROFILING_WRITE_RESULT);
-	PROFILING_OBJECT(PRIM_NONE);
+  PROFILING_INIT(kg, PROFILING_WRITE_RESULT);
+  PROFILING_OBJECT(PRIM_NONE);
 
-	float alpha;
-	float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);
+  float alpha;
+  float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);
 
-	kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
+  kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
 
-	kernel_write_light_passes(kg, buffer, L);
+  kernel_write_light_passes(kg, buffer, L);
 
 #ifdef __DENOISING_FEATURES__
-	if(kernel_data.film.pass_denoising_data) {
+  if (kernel_data.film.pass_denoising_data) {
 #  ifdef __SHADOW_TRICKS__
-		kernel_write_denoising_shadow(kg,
-		                              buffer + kernel_data.film.pass_denoising_data,
-		                              sample,
-		                              average(L->path_total),
-		                              average(L->path_total_shaded));
+    kernel_write_denoising_shadow(kg,
+                                  buffer + kernel_data.film.pass_denoising_data,
+                                  sample,
+                                  average(L->path_total),
+                                  average(L->path_total_shaded));
 #  else
-		kernel_write_denoising_shadow(kg,
-		                              buffer + kernel_data.film.pass_denoising_data,
-		                              sample,
-		                              0.0f, 0.0f);
+    kernel_write_denoising_shadow(
+        kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f);
 #  endif
-		if(kernel_data.film.pass_denoising_clean) {
-			float3 noisy, clean;
-			path_radiance_split_denoising(kg, L, &noisy, &clean);
-			kernel_write_pass_float3_variance(
-			        buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
-			        noisy);
-			kernel_write_pass_float3_unaligned(
-			        buffer + kernel_data.film.pass_denoising_clean,
-			        clean);
-		}
-		else {
-			kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
-			                                    ensure_finite3(L_sum));
-		}
-
-		kernel_write_pass_float3_variance(
-		        buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL,
-		        L->denoising_normal);
-		kernel_write_pass_float3_variance(
-		        buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO,
-		        L->denoising_albedo);
-		kernel_write_pass_float_variance(
-		        buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH,
-		        L->denoising_depth);
-	}
-#endif  /* __DENOISING_FEATURES__ */
-
+    if (kernel_data.film.pass_denoising_clean) {
+      float3 noisy, clean;
+      path_radiance_split_denoising(kg, L, &noisy, &clean);
+      kernel_write_pass_float3_variance(
+          buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, noisy);
+      kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, clean);
+    }
+    else {
+      kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+                                            DENOISING_PASS_COLOR,
+                                        ensure_finite3(L_sum));
+    }
+
+    kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+                                          DENOISING_PASS_NORMAL,
+                                      L->denoising_normal);
+    kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+                                          DENOISING_PASS_ALBEDO,
+                                      L->denoising_albedo);
+    kernel_write_pass_float_variance(
+        buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, L->denoising_depth);
+  }
+#endif /* __DENOISING_FEATURES__ */
 
 #ifdef __KERNEL_DEBUG__
-	kernel_write_debug_passes(kg, buffer, L);
+  kernel_write_debug_passes(kg, buffer, L);
 #endif
 }
 
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index a1fc6028293..2be1b745632 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -50,309 +50,298 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_forceinline bool kernel_path_scene_intersect(
-	KernelGlobals *kg,
-	ccl_addr_space PathState *state,
-	Ray *ray,
-	Intersection *isect,
-	PathRadiance *L)
+ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg,
+                                                        ccl_addr_space PathState *state,
+                                                        Ray *ray,
+                                                        Intersection *isect,
+                                                        PathRadiance *L)
 {
-	PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT);
+  PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT);
 
-	uint visibility = path_state_ray_visibility(kg, state);
+  uint visibility = path_state_ray_visibility(kg, state);
 
-	if(path_state_ao_bounce(kg, state)) {
-		visibility = PATH_RAY_SHADOW;
-		ray->t = kernel_data.background.ao_distance;
-	}
+  if (path_state_ao_bounce(kg, state)) {
+    visibility = PATH_RAY_SHADOW;
+    ray->t = kernel_data.background.ao_distance;
+  }
 
 #ifdef __HAIR__
-	float difl = 0.0f, extmax = 0.0f;
-	uint lcg_state = 0;
+  float difl = 0.0f, extmax = 0.0f;
+  uint lcg_state = 0;
 
-	if(kernel_data.bvh.have_curves) {
-		if((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) {
-			float3 pixdiff = ray->dD.dx + ray->dD.dy;
-			/*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
-			difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
-		}
+  if (kernel_data.bvh.have_curves) {
+    if ((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) {
+      float3 pixdiff = ray->dD.dx + ray->dD.dy;
+      /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
+      difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
+    }
 
-		extmax = kernel_data.curve.maximum_width;
-		lcg_state = lcg_state_init_addrspace(state, 0x51633e2d);
-	}
+    extmax = kernel_data.curve.maximum_width;
+    lcg_state = lcg_state_init_addrspace(state, 0x51633e2d);
+  }
 
-	bool hit = scene_intersect(kg, *ray, visibility, isect, &lcg_state, difl, extmax);
+  bool hit = scene_intersect(kg, *ray, visibility, isect, &lcg_state, difl, extmax);
 #else
-	bool hit = scene_intersect(kg, *ray, visibility, isect, NULL, 0.0f, 0.0f);
-#endif  /* __HAIR__ */
+  bool hit = scene_intersect(kg, *ray, visibility, isect, NULL, 0.0f, 0.0f);
+#endif /* __HAIR__ */
 
 #ifdef __KERNEL_DEBUG__
-	if(state->flag & PATH_RAY_CAMERA) {
-		L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes;
-		L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances;
-		L->debug_data.num_bvh_intersections += isect->num_intersections;
-	}
-	L->debug_data.num_ray_bounces++;
-#endif  /* __KERNEL_DEBUG__ */
-
-	return hit;
+  if (state->flag & PATH_RAY_CAMERA) {
+    L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes;
+    L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances;
+    L->debug_data.num_bvh_intersections += isect->num_intersections;
+  }
+  L->debug_data.num_ray_bounces++;
+#endif /* __KERNEL_DEBUG__ */
+
+  return hit;
 }
 
-ccl_device_forceinline void kernel_path_lamp_emission(
-	KernelGlobals *kg,
-	ccl_addr_space PathState *state,
-	Ray *ray,
-	float3 throughput,
-	ccl_addr_space Intersection *isect,
-	ShaderData *emission_sd,
-	PathRadiance *L)
+ccl_device_forceinline void kernel_path_lamp_emission(KernelGlobals *kg,
+                                                      ccl_addr_space PathState *state,
+                                                      Ray *ray,
+                                                      float3 throughput,
+                                                      ccl_addr_space Intersection *isect,
+                                                      ShaderData *emission_sd,
+                                                      PathRadiance *L)
 {
-	PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION);
+  PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION);
 
 #ifdef __LAMP_MIS__
-	if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
-		/* ray starting from previous non-transparent bounce */
-		Ray light_ray;
-
-		light_ray.P = ray->P - state->ray_t*ray->D;
-		state->ray_t += isect->t;
-		light_ray.D = ray->D;
-		light_ray.t = state->ray_t;
-		light_ray.time = ray->time;
-		light_ray.dD = ray->dD;
-		light_ray.dP = ray->dP;
-
-		/* intersect with lamp */
-		float3 emission;
-
-		if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission))
-			path_radiance_accum_emission(L, state, throughput, emission);
-	}
-#endif  /* __LAMP_MIS__ */
+  if (kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
+    /* ray starting from previous non-transparent bounce */
+    Ray light_ray;
+
+    light_ray.P = ray->P - state->ray_t * ray->D;
+    state->ray_t += isect->t;
+    light_ray.D = ray->D;
+    light_ray.t = state->ray_t;
+    light_ray.time = ray->time;
+    light_ray.dD = ray->dD;
+    light_ray.dP = ray->dP;
+
+    /* intersect with lamp */
+    float3 emission;
+
+    if (indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission))
+      path_radiance_accum_emission(L, state, throughput, emission);
+  }
+#endif /* __LAMP_MIS__ */
 }
 
-ccl_device_forceinline void kernel_path_background(
-	KernelGlobals *kg,
-	ccl_addr_space PathState *state,
-	ccl_addr_space Ray *ray,
-	float3 throughput,
-	ShaderData *sd,
-	PathRadiance *L)
+ccl_device_forceinline void kernel_path_background(KernelGlobals *kg,
+                                                   ccl_addr_space PathState *state,
+                                                   ccl_addr_space Ray *ray,
+                                                   float3 throughput,
+                                                   ShaderData *sd,
+                                                   PathRadiance *L)
 {
-	/* eval background shader if nothing hit */
-	if(kernel_data.background.transparent && (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
-		L->transparent += average(throughput);
+  /* eval background shader if nothing hit */
+  if (kernel_data.background.transparent && (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+    L->transparent += average(throughput);
 
 #ifdef __PASSES__
-		if(!(kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND)))
-#endif  /* __PASSES__ */
-			return;
-	}
+    if (!(kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND)))
+#endif /* __PASSES__ */
+      return;
+  }
 
-	/* When using the ao bounces approximation, adjust background
-	 * shader intensity with ao factor. */
-	if(path_state_ao_bounce(kg, state)) {
-		throughput *= kernel_data.background.ao_bounces_factor;
-	}
+  /* When using the ao bounces approximation, adjust background
+   * shader intensity with ao factor. */
+  if (path_state_ao_bounce(kg, state)) {
+    throughput *= kernel_data.background.ao_bounces_factor;
+  }
 
 #ifdef __BACKGROUND__
-	/* sample background shader */
-	float3 L_background = indirect_background(kg, sd, state, ray);
-	path_radiance_accum_background(L, state, throughput, L_background);
-#endif  /* __BACKGROUND__ */
+  /* sample background shader */
+  float3 L_background = indirect_background(kg, sd, state, ray);
+  path_radiance_accum_background(L, state, throughput, L_background);
+#endif /* __BACKGROUND__ */
 }
 
 #ifndef __SPLIT_KERNEL__
 
-#ifdef __VOLUME__
-ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
-	KernelGlobals *kg,
-	ShaderData *sd,
-	PathState *state,
-	Ray *ray,
-	float3 *throughput,
-	ccl_addr_space Intersection *isect,
-	bool hit,
-	ShaderData *emission_sd,
-	PathRadiance *L)
+#  ifdef __VOLUME__
+ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *kg,
+                                                                ShaderData *sd,
+                                                                PathState *state,
+                                                                Ray *ray,
+                                                                float3 *throughput,
+                                                                ccl_addr_space Intersection *isect,
+                                                                bool hit,
+                                                                ShaderData *emission_sd,
+                                                                PathRadiance *L)
 {
-	PROFILING_INIT(kg, PROFILING_VOLUME);
-
-	/* Sanitize volume stack. */
-	if(!hit) {
-		kernel_volume_clean_stack(kg, state->volume_stack);
-	}
-
-	if(state->volume_stack[0].shader == SHADER_NONE) {
-		return VOLUME_PATH_ATTENUATED;
-	}
-
-	/* volume attenuation, emission, scatter */
-	Ray volume_ray = *ray;
-	volume_ray.t = (hit)? isect->t: FLT_MAX;
-
-	bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
-
-#  ifdef __VOLUME_DECOUPLED__
-	int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
-	bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
-	bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
-
-	if(decoupled) {
-		/* cache steps along volume for repeated sampling */
-		VolumeSegment volume_segment;
-
-		shader_setup_from_volume(kg, sd, &volume_ray);
-		kernel_volume_decoupled_record(kg, state,
-			&volume_ray, sd, &volume_segment, heterogeneous);
-
-		volume_segment.sampling_method = sampling_method;
-
-		/* emission */
-		if(volume_segment.closure_flag & SD_EMISSION)
-			path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
-
-		/* scattering */
-		VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
-
-		if(volume_segment.closure_flag & SD_SCATTER) {
-			int all = kernel_data.integrator.sample_all_lights_indirect;
-
-			/* direct light sampling */
-			kernel_branched_path_volume_connect_light(kg, sd,
-				emission_sd, *throughput, state, L, all,
-				&volume_ray, &volume_segment);
-
-			/* indirect sample. if we use distance sampling and take just
-			 * one sample for direct and indirect light, we could share
-			 * this computation, but makes code a bit complex */
-			float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
-			float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-
-			result = kernel_volume_decoupled_scatter(kg,
-				state, &volume_ray, sd, throughput,
-				rphase, rscatter, &volume_segment, NULL, true);
-		}
-
-		/* free cached steps */
-		kernel_volume_decoupled_free(kg, &volume_segment);
-
-		if(result == VOLUME_PATH_SCATTERED) {
-			if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
-				return VOLUME_PATH_SCATTERED;
-			else
-				return VOLUME_PATH_MISSED;
-		}
-		else {
-			*throughput *= volume_segment.accum_transmittance;
-		}
-	}
-	else
-#  endif  /* __VOLUME_DECOUPLED__ */
-	{
-		/* integrate along volume segment with distance sampling */
-		VolumeIntegrateResult result = kernel_volume_integrate(
-			kg, state, sd, &volume_ray, L, throughput, heterogeneous);
-
-#  ifdef __VOLUME_SCATTER__
-		if(result == VOLUME_PATH_SCATTERED) {
-			/* direct lighting */
-			kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
-
-			/* indirect light bounce */
-			if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
-				return VOLUME_PATH_SCATTERED;
-			else
-				return VOLUME_PATH_MISSED;
-		}
-#  endif  /* __VOLUME_SCATTER__ */
-	}
-
-	return VOLUME_PATH_ATTENUATED;
+  PROFILING_INIT(kg, PROFILING_VOLUME);
+
+  /* Sanitize volume stack. */
+  if (!hit) {
+    kernel_volume_clean_stack(kg, state->volume_stack);
+  }
+
+  if (state->volume_stack[0].shader == SHADER_NONE) {
+    return VOLUME_PATH_ATTENUATED;
+  }
+
+  /* volume attenuation, emission, scatter */
+  Ray volume_ray = *ray;
+  volume_ray.t = (hit) ? isect->t : FLT_MAX;
+
+  bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+#    ifdef __VOLUME_DECOUPLED__
+  int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+  bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
+  bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
+
+  if (decoupled) {
+    /* cache steps along volume for repeated sampling */
+    VolumeSegment volume_segment;
+
+    shader_setup_from_volume(kg, sd, &volume_ray);
+    kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+
+    volume_segment.sampling_method = sampling_method;
+
+    /* emission */
+    if (volume_segment.closure_flag & SD_EMISSION)
+      path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
+
+    /* scattering */
+    VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+
+    if (volume_segment.closure_flag & SD_SCATTER) {
+      int all = kernel_data.integrator.sample_all_lights_indirect;
+
+      /* direct light sampling */
+      kernel_branched_path_volume_connect_light(
+          kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment);
+
+      /* indirect sample. if we use distance sampling and take just
+       * one sample for direct and indirect light, we could share
+       * this computation, but makes code a bit complex */
+      float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+      float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+
+      result = kernel_volume_decoupled_scatter(
+          kg, state, &volume_ray, sd, throughput, rphase, rscatter, &volume_segment, NULL, true);
+    }
+
+    /* free cached steps */
+    kernel_volume_decoupled_free(kg, &volume_segment);
+
+    if (result == VOLUME_PATH_SCATTERED) {
+      if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+        return VOLUME_PATH_SCATTERED;
+      else
+        return VOLUME_PATH_MISSED;
+    }
+    else {
+      *throughput *= volume_segment.accum_transmittance;
+    }
+  }
+  else
+#    endif /* __VOLUME_DECOUPLED__ */
+  {
+    /* integrate along volume segment with distance sampling */
+    VolumeIntegrateResult result = kernel_volume_integrate(
+        kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+
+#    ifdef __VOLUME_SCATTER__
+    if (result == VOLUME_PATH_SCATTERED) {
+      /* direct lighting */
+      kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+      /* indirect light bounce */
+      if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+        return VOLUME_PATH_SCATTERED;
+      else
+        return VOLUME_PATH_MISSED;
+    }
+#    endif /* __VOLUME_SCATTER__ */
+  }
+
+  return VOLUME_PATH_ATTENUATED;
 }
-#endif  /* __VOLUME__ */
-
-#endif  /* __SPLIT_KERNEL__ */
-
-ccl_device_forceinline bool kernel_path_shader_apply(
-	KernelGlobals *kg,
-	ShaderData *sd,
-	ccl_addr_space PathState *state,
-	ccl_addr_space Ray *ray,
-	float3 throughput,
-	ShaderData *emission_sd,
-	PathRadiance *L,
-	ccl_global float *buffer)
+#  endif /* __VOLUME__ */
+
+#endif /* __SPLIT_KERNEL__ */
+
+ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg,
+                                                     ShaderData *sd,
+                                                     ccl_addr_space PathState *state,
+                                                     ccl_addr_space Ray *ray,
+                                                     float3 throughput,
+                                                     ShaderData *emission_sd,
+                                                     PathRadiance *L,
+                                                     ccl_global float *buffer)
 {
-	PROFILING_INIT(kg, PROFILING_SHADER_APPLY);
+  PROFILING_INIT(kg, PROFILING_SHADER_APPLY);
 
 #ifdef __SHADOW_TRICKS__
-	if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
-		if(state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) {
-			state->flag |= (PATH_RAY_SHADOW_CATCHER |
-						   PATH_RAY_STORE_SHADOW_INFO);
-
-			float3 bg = make_float3(0.0f, 0.0f, 0.0f);
-			if(!kernel_data.background.transparent) {
-				bg = indirect_background(kg, emission_sd, state, ray);
-			}
-			path_radiance_accum_shadowcatcher(L, throughput, bg);
-		}
-	}
-	else if(state->flag & PATH_RAY_SHADOW_CATCHER) {
-		/* Only update transparency after shadow catcher bounce. */
-		L->shadow_transparency *=
-				average(shader_bsdf_transparency(kg, sd));
-	}
-#endif  /* __SHADOW_TRICKS__ */
-
-	/* holdout */
+  if ((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
+    if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) {
+      state->flag |= (PATH_RAY_SHADOW_CATCHER | PATH_RAY_STORE_SHADOW_INFO);
+
+      float3 bg = make_float3(0.0f, 0.0f, 0.0f);
+      if (!kernel_data.background.transparent) {
+        bg = indirect_background(kg, emission_sd, state, ray);
+      }
+      path_radiance_accum_shadowcatcher(L, throughput, bg);
+    }
+  }
+  else if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+    /* Only update transparency after shadow catcher bounce. */
+    L->shadow_transparency *= average(shader_bsdf_transparency(kg, sd));
+  }
+#endif /* __SHADOW_TRICKS__ */
+
+  /* holdout */
 #ifdef __HOLDOUT__
-	if(((sd->flag & SD_HOLDOUT) ||
-		(sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
-	   (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND))
-	{
-		if(kernel_data.background.transparent) {
-			float3 holdout_weight;
-			if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
-				holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
-			}
-			else {
-				holdout_weight = shader_holdout_eval(kg, sd);
-			}
-			/* any throughput is ok, should all be identical here */
-			L->transparent += average(holdout_weight*throughput);
-		}
-
-		if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
-			return false;
-		}
-	}
-#endif  /* __HOLDOUT__ */
-
-	/* holdout mask objects do not write data passes */
-	kernel_write_data_passes(kg, buffer, L, sd, state, throughput);
-
-	/* blurring of bsdf after bounces, for rays that have a small likelihood
-	 * of following this particular path (diffuse, rough glossy) */
-	if(kernel_data.integrator.filter_glossy != FLT_MAX) {
-		float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
-
-		if(blur_pdf < 1.0f) {
-			float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
-			shader_bsdf_blur(kg, sd, blur_roughness);
-		}
-	}
+  if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
+      (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+    if (kernel_data.background.transparent) {
+      float3 holdout_weight;
+      if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+        holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
+      }
+      else {
+        holdout_weight = shader_holdout_eval(kg, sd);
+      }
+      /* any throughput is ok, should all be identical here */
+      L->transparent += average(holdout_weight * throughput);
+    }
+
+    if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+      return false;
+    }
+  }
+#endif /* __HOLDOUT__ */
+
+  /* holdout mask objects do not write data passes */
+  kernel_write_data_passes(kg, buffer, L, sd, state, throughput);
+
+  /* blurring of bsdf after bounces, for rays that have a small likelihood
+   * of following this particular path (diffuse, rough glossy) */
+  if (kernel_data.integrator.filter_glossy != FLT_MAX) {
+    float blur_pdf = kernel_data.integrator.filter_glossy * state->min_ray_pdf;
+
+    if (blur_pdf < 1.0f) {
+      float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
+      shader_bsdf_blur(kg, sd, blur_roughness);
+    }
+  }
 
 #ifdef __EMISSION__
-	/* emission */
-	if(sd->flag & SD_EMISSION) {
-		float3 emission = indirect_primitive_emission(kg, sd, sd->ray_length, state->flag, state->ray_pdf);
-		path_radiance_accum_emission(L, state, throughput, emission);
-	}
-#endif  /* __EMISSION__ */
-
-	return true;
+  /* emission */
+  if (sd->flag & SD_EMISSION) {
+    float3 emission = indirect_primitive_emission(
+        kg, sd, sd->ray_length, state->flag, state->ray_pdf);
+    path_radiance_accum_emission(L, state, throughput, emission);
+  }
+#endif /* __EMISSION__ */
+
+  return true;
 }
 
 ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
@@ -363,44 +352,44 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
                                         float3 throughput,
                                         float3 ao_alpha)
 {
-	PROFILING_INIT(kg, PROFILING_AO);
-
-	/* todo: solve correlation */
-	float bsdf_u, bsdf_v;
-
-	path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
-	float ao_factor = kernel_data.background.ao_factor;
-	float3 ao_N;
-	float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
-	float3 ao_D;
-	float ao_pdf;
-
-	sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
-	if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
-		Ray light_ray;
-		float3 ao_shadow;
-
-		light_ray.P = ray_offset(sd->P, sd->Ng);
-		light_ray.D = ao_D;
-		light_ray.t = kernel_data.background.ao_distance;
-		light_ray.time = sd->time;
-		light_ray.dP = sd->dP;
-		light_ray.dD = differential3_zero();
-
-		if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
-			path_radiance_accum_ao(L, state, throughput, ao_alpha, ao_bsdf, ao_shadow);
-		}
-		else {
-			path_radiance_accum_total_ao(L, state, throughput, ao_bsdf);
-		}
-	}
+  PROFILING_INIT(kg, PROFILING_AO);
+
+  /* todo: solve correlation */
+  float bsdf_u, bsdf_v;
+
+  path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+  float ao_factor = kernel_data.background.ao_factor;
+  float3 ao_N;
+  float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+  float3 ao_D;
+  float ao_pdf;
+
+  sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+  if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+    Ray light_ray;
+    float3 ao_shadow;
+
+    light_ray.P = ray_offset(sd->P, sd->Ng);
+    light_ray.D = ao_D;
+    light_ray.t = kernel_data.background.ao_distance;
+    light_ray.time = sd->time;
+    light_ray.dP = sd->dP;
+    light_ray.dD = differential3_zero();
+
+    if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
+      path_radiance_accum_ao(L, state, throughput, ao_alpha, ao_bsdf, ao_shadow);
+    }
+    else {
+      path_radiance_accum_total_ao(L, state, throughput, ao_bsdf);
+    }
+  }
 }
 
 #ifndef __SPLIT_KERNEL__
 
-#if defined(__BRANCHED_PATH__) || defined(__BAKING__)
+#  if defined(__BRANCHED_PATH__) || defined(__BAKING__)
 
 ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                      ShaderData *sd,
@@ -410,369 +399,300 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                      PathState *state,
                                      PathRadiance *L)
 {
-#ifdef __SUBSURFACE__
-	SubsurfaceIndirectRays ss_indirect;
-	kernel_path_subsurface_init_indirect(&ss_indirect);
-
-	for(;;) {
-#endif  /* __SUBSURFACE__ */
-
-	/* path iteration */
-	for(;;) {
-		/* Find intersection with objects in scene. */
-		Intersection isect;
-		bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
-
-		/* Find intersection with lamps and compute emission for MIS. */
-		kernel_path_lamp_emission(kg, state, ray, throughput, &isect, sd, L);
-
-#ifdef __VOLUME__
-		/* Volume integration. */
-		VolumeIntegrateResult result = kernel_path_volume(kg,
-		                                                   sd,
-		                                                   state,
-		                                                   ray,
-		                                                   &throughput,
-		                                                   &isect,
-		                                                   hit,
-		                                                   emission_sd,
-		                                                   L);
-
-		if(result == VOLUME_PATH_SCATTERED) {
-			continue;
-		}
-		else if(result == VOLUME_PATH_MISSED) {
-			break;
-		}
-#endif  /* __VOLUME__*/
-
-		/* Shade background. */
-		if(!hit) {
-			kernel_path_background(kg, state, ray, throughput, sd, L);
-			break;
-		}
-		else if(path_state_ao_bounce(kg, state)) {
-			break;
-		}
-
-		/* Setup shader data. */
-		shader_setup_from_ray(kg, sd, &isect, ray);
-
-		/* Skip most work for volume bounding surface. */
-#ifdef __VOLUME__
-		if(!(sd->flag & SD_HAS_ONLY_VOLUME)) {
-#endif
-
-		/* Evaluate shader. */
-		shader_eval_surface(kg, sd, state, state->flag);
-		shader_prepare_closures(sd, state);
-
-		/* Apply shadow catcher, holdout, emission. */
-		if(!kernel_path_shader_apply(kg,
-		                             sd,
-		                             state,
-		                             ray,
-		                             throughput,
-		                             emission_sd,
-		                             L,
-		                             NULL))
-		{
-			break;
-		}
-
-		/* path termination. this is a strange place to put the termination, it's
-		 * mainly due to the mixed in MIS that we use. gives too many unneeded
-		 * shader evaluations, only need emission if we are going to terminate */
-		float probability = path_state_continuation_probability(kg, state, throughput);
-
-		if(probability == 0.0f) {
-			break;
-		}
-		else if(probability != 1.0f) {
-			float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
-
-			if(terminate >= probability)
-				break;
-
-			throughput /= probability;
-		}
-
-		kernel_update_denoising_features(kg, sd, state, L);
-
-#ifdef __AO__
-		/* ambient occlusion */
-		if(kernel_data.integrator.use_ambient_occlusion) {
-			kernel_path_ao(kg, sd, emission_sd, L, state, throughput, make_float3(0.0f, 0.0f, 0.0f));
-		}
-#endif  /* __AO__ */
-
-
-#ifdef __SUBSURFACE__
-		/* bssrdf scatter to a different location on the same object, replacing
-		 * the closures with a diffuse BSDF */
-		if(sd->flag & SD_BSSRDF) {
-			if(kernel_path_subsurface_scatter(kg,
-			                                  sd,
-			                                  emission_sd,
-			                                  L,
-			                                  state,
-			                                  ray,
-			                                  &throughput,
-			                                  &ss_indirect))
-			{
-				break;
-			}
-		}
-#endif  /* __SUBSURFACE__ */
-
-#if defined(__EMISSION__)
-		if(kernel_data.integrator.use_direct_light) {
-			int all = (kernel_data.integrator.sample_all_lights_indirect) ||
-			          (state->flag & PATH_RAY_SHADOW_CATCHER);
-			kernel_branched_path_surface_connect_light(kg,
-			                                           sd,
-			                                           emission_sd,
-			                                           state,
-			                                           throughput,
-			                                           1.0f,
-			                                           L,
-			                                           all);
-		}
-#endif  /* defined(__EMISSION__) */
-
-#ifdef __VOLUME__
-		}
-#endif
-
-		if(!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray))
-			break;
-	}
-
-#ifdef __SUBSURFACE__
-		/* Trace indirect subsurface rays by restarting the loop. this uses less
-		 * stack memory than invoking kernel_path_indirect.
-		 */
-		if(ss_indirect.num_rays) {
-			kernel_path_subsurface_setup_indirect(kg,
-			                                      &ss_indirect,
-			                                      state,
-			                                      ray,
-			                                      L,
-			                                      &throughput);
-		}
-		else {
-			break;
-		}
-	}
-#endif  /* __SUBSURFACE__ */
+#    ifdef __SUBSURFACE__
+  SubsurfaceIndirectRays ss_indirect;
+  kernel_path_subsurface_init_indirect(&ss_indirect);
+
+  for (;;) {
+#    endif /* __SUBSURFACE__ */
+
+    /* path iteration */
+    for (;;) {
+      /* Find intersection with objects in scene. */
+      Intersection isect;
+      bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
+
+      /* Find intersection with lamps and compute emission for MIS. */
+      kernel_path_lamp_emission(kg, state, ray, throughput, &isect, sd, L);
+
+#    ifdef __VOLUME__
+      /* Volume integration. */
+      VolumeIntegrateResult result = kernel_path_volume(
+          kg, sd, state, ray, &throughput, &isect, hit, emission_sd, L);
+
+      if (result == VOLUME_PATH_SCATTERED) {
+        continue;
+      }
+      else if (result == VOLUME_PATH_MISSED) {
+        break;
+      }
+#    endif /* __VOLUME__*/
+
+      /* Shade background. */
+      if (!hit) {
+        kernel_path_background(kg, state, ray, throughput, sd, L);
+        break;
+      }
+      else if (path_state_ao_bounce(kg, state)) {
+        break;
+      }
+
+      /* Setup shader data. */
+      shader_setup_from_ray(kg, sd, &isect, ray);
+
+      /* Skip most work for volume bounding surface. */
+#    ifdef __VOLUME__
+      if (!(sd->flag & SD_HAS_ONLY_VOLUME)) {
+#    endif
+
+        /* Evaluate shader. */
+        shader_eval_surface(kg, sd, state, state->flag);
+        shader_prepare_closures(sd, state);
+
+        /* Apply shadow catcher, holdout, emission. */
+        if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, NULL)) {
+          break;
+        }
+
+        /* path termination. this is a strange place to put the termination, it's
+     * mainly due to the mixed in MIS that we use. gives too many unneeded
+     * shader evaluations, only need emission if we are going to terminate */
+        float probability = path_state_continuation_probability(kg, state, throughput);
+
+        if (probability == 0.0f) {
+          break;
+        }
+        else if (probability != 1.0f) {
+          float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+
+          if (terminate >= probability)
+            break;
+
+          throughput /= probability;
+        }
+
+        kernel_update_denoising_features(kg, sd, state, L);
+
+#    ifdef __AO__
+        /* ambient occlusion */
+        if (kernel_data.integrator.use_ambient_occlusion) {
+          kernel_path_ao(kg, sd, emission_sd, L, state, throughput, make_float3(0.0f, 0.0f, 0.0f));
+        }
+#    endif /* __AO__ */
+
+#    ifdef __SUBSURFACE__
+        /* bssrdf scatter to a different location on the same object, replacing
+     * the closures with a diffuse BSDF */
+        if (sd->flag & SD_BSSRDF) {
+          if (kernel_path_subsurface_scatter(
+                  kg, sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {
+            break;
+          }
+        }
+#    endif /* __SUBSURFACE__ */
+
+#    if defined(__EMISSION__)
+        if (kernel_data.integrator.use_direct_light) {
+          int all = (kernel_data.integrator.sample_all_lights_indirect) ||
+                    (state->flag & PATH_RAY_SHADOW_CATCHER);
+          kernel_branched_path_surface_connect_light(
+              kg, sd, emission_sd, state, throughput, 1.0f, L, all);
+        }
+#    endif /* defined(__EMISSION__) */
+
+#    ifdef __VOLUME__
+      }
+#    endif
+
+      if (!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray))
+        break;
+    }
+
+#    ifdef __SUBSURFACE__
+    /* Trace indirect subsurface rays by restarting the loop. this uses less
+     * stack memory than invoking kernel_path_indirect.
+     */
+    if (ss_indirect.num_rays) {
+      kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput);
+    }
+    else {
+      break;
+    }
+  }
+#    endif /* __SUBSURFACE__ */
 }
 
-#endif  /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
+#  endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
 
-ccl_device_forceinline void kernel_path_integrate(
-	KernelGlobals *kg,
-	PathState *state,
-	float3 throughput,
-	Ray *ray,
-	PathRadiance *L,
-	ccl_global float *buffer,
-	ShaderData *emission_sd)
+ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
+                                                  PathState *state,
+                                                  float3 throughput,
+                                                  Ray *ray,
+                                                  PathRadiance *L,
+                                                  ccl_global float *buffer,
+                                                  ShaderData *emission_sd)
 {
-	PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE);
-
-	/* Shader data memory used for both volumes and surfaces, saves stack space. */
-	ShaderData sd;
-
-#ifdef __SUBSURFACE__
-	SubsurfaceIndirectRays ss_indirect;
-	kernel_path_subsurface_init_indirect(&ss_indirect);
-
-	for(;;) {
-#endif  /* __SUBSURFACE__ */
-
-	/* path iteration */
-	for(;;) {
-		/* Find intersection with objects in scene. */
-		Intersection isect;
-		bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
-
-		/* Find intersection with lamps and compute emission for MIS. */
-		kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L);
-
-#ifdef __VOLUME__
-		/* Volume integration. */
-		VolumeIntegrateResult result = kernel_path_volume(kg,
-		                                                   &sd,
-		                                                   state,
-		                                                   ray,
-		                                                   &throughput,
-		                                                   &isect,
-		                                                   hit,
-		                                                   emission_sd,
-		                                                   L);
-
-		if(result == VOLUME_PATH_SCATTERED) {
-			continue;
-		}
-		else if(result == VOLUME_PATH_MISSED) {
-			break;
-		}
-#endif  /* __VOLUME__*/
-
-		/* Shade background. */
-		if(!hit) {
-			kernel_path_background(kg, state, ray, throughput, &sd, L);
-			break;
-		}
-		else if(path_state_ao_bounce(kg, state)) {
-			break;
-		}
-
-		/* Setup shader data. */
-		shader_setup_from_ray(kg, &sd, &isect, ray);
-
-		/* Skip most work for volume bounding surface. */
-#ifdef __VOLUME__
-		if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
-#endif
-
-		/* Evaluate shader. */
-		shader_eval_surface(kg, &sd, state, state->flag);
-		shader_prepare_closures(&sd, state);
-
-		/* Apply shadow catcher, holdout, emission. */
-		if(!kernel_path_shader_apply(kg,
-		                             &sd,
-		                             state,
-		                             ray,
-		                             throughput,
-		                             emission_sd,
-		                             L,
-		                             buffer))
-		{
-			break;
-		}
-
-		/* path termination. this is a strange place to put the termination, it's
-		 * mainly due to the mixed in MIS that we use. gives too many unneeded
-		 * shader evaluations, only need emission if we are going to terminate */
-		float probability = path_state_continuation_probability(kg, state, throughput);
-
-		if(probability == 0.0f) {
-			break;
-		}
-		else if(probability != 1.0f) {
-			float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
-			if(terminate >= probability)
-				break;
-
-			throughput /= probability;
-		}
-
-		kernel_update_denoising_features(kg, &sd, state, L);
-
-#ifdef __AO__
-		/* ambient occlusion */
-		if(kernel_data.integrator.use_ambient_occlusion) {
-			kernel_path_ao(kg, &sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, &sd));
-		}
-#endif  /* __AO__ */
-
-#ifdef __SUBSURFACE__
-		/* bssrdf scatter to a different location on the same object, replacing
-		 * the closures with a diffuse BSDF */
-		if(sd.flag & SD_BSSRDF) {
-			if(kernel_path_subsurface_scatter(kg,
-			                                  &sd,
-			                                  emission_sd,
-			                                  L,
-			                                  state,
-			                                  ray,
-			                                  &throughput,
-			                                  &ss_indirect))
-			{
-				break;
-			}
-		}
-#endif  /* __SUBSURFACE__ */
-
-		/* direct lighting */
-		kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L);
-
-#ifdef __VOLUME__
-		}
-#endif
-
-		/* compute direct lighting and next bounce */
-		if(!kernel_path_surface_bounce(kg, &sd, &throughput, state, &L->state, ray))
-			break;
-	}
-
-#ifdef __SUBSURFACE__
-		/* Trace indirect subsurface rays by restarting the loop. this uses less
-		 * stack memory than invoking kernel_path_indirect.
-		 */
-		if(ss_indirect.num_rays) {
-			kernel_path_subsurface_setup_indirect(kg,
-			                                      &ss_indirect,
-			                                      state,
-			                                      ray,
-			                                      L,
-			                                      &throughput);
-		}
-		else {
-			break;
-		}
-	}
-#endif  /* __SUBSURFACE__ */
+  PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE);
+
+  /* Shader data memory used for both volumes and surfaces, saves stack space. */
+  ShaderData sd;
+
+#  ifdef __SUBSURFACE__
+  SubsurfaceIndirectRays ss_indirect;
+  kernel_path_subsurface_init_indirect(&ss_indirect);
+
+  for (;;) {
+#  endif /* __SUBSURFACE__ */
+
+    /* path iteration */
+    for (;;) {
+      /* Find intersection with objects in scene. */
+      Intersection isect;
+      bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
+
+      /* Find intersection with lamps and compute emission for MIS. */
+      kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L);
+
+#  ifdef __VOLUME__
+      /* Volume integration. */
+      VolumeIntegrateResult result = kernel_path_volume(
+          kg, &sd, state, ray, &throughput, &isect, hit, emission_sd, L);
+
+      if (result == VOLUME_PATH_SCATTERED) {
+        continue;
+      }
+      else if (result == VOLUME_PATH_MISSED) {
+        break;
+      }
+#  endif /* __VOLUME__*/
+
+      /* Shade background. */
+      if (!hit) {
+        kernel_path_background(kg, state, ray, throughput, &sd, L);
+        break;
+      }
+      else if (path_state_ao_bounce(kg, state)) {
+        break;
+      }
+
+      /* Setup shader data. */
+      shader_setup_from_ray(kg, &sd, &isect, ray);
+
+      /* Skip most work for volume bounding surface. */
+#  ifdef __VOLUME__
+      if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+#  endif
+
+        /* Evaluate shader. */
+        shader_eval_surface(kg, &sd, state, state->flag);
+        shader_prepare_closures(&sd, state);
+
+        /* Apply shadow catcher, holdout, emission. */
+        if (!kernel_path_shader_apply(kg, &sd, state, ray, throughput, emission_sd, L, buffer)) {
+          break;
+        }
+
+        /* path termination. this is a strange place to put the termination, it's
+     * mainly due to the mixed in MIS that we use. gives too many unneeded
+     * shader evaluations, only need emission if we are going to terminate */
+        float probability = path_state_continuation_probability(kg, state, throughput);
+
+        if (probability == 0.0f) {
+          break;
+        }
+        else if (probability != 1.0f) {
+          float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+          if (terminate >= probability)
+            break;
+
+          throughput /= probability;
+        }
+
+        kernel_update_denoising_features(kg, &sd, state, L);
+
+#  ifdef __AO__
+        /* ambient occlusion */
+        if (kernel_data.integrator.use_ambient_occlusion) {
+          kernel_path_ao(kg, &sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, &sd));
+        }
+#  endif /* __AO__ */
+
+#  ifdef __SUBSURFACE__
+        /* bssrdf scatter to a different location on the same object, replacing
+     * the closures with a diffuse BSDF */
+        if (sd.flag & SD_BSSRDF) {
+          if (kernel_path_subsurface_scatter(
+                  kg, &sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {
+            break;
+          }
+        }
+#  endif /* __SUBSURFACE__ */
+
+        /* direct lighting */
+        kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L);
+
+#  ifdef __VOLUME__
+      }
+#  endif
+
+      /* compute direct lighting and next bounce */
+      if (!kernel_path_surface_bounce(kg, &sd, &throughput, state, &L->state, ray))
+        break;
+    }
+
+#  ifdef __SUBSURFACE__
+    /* Trace indirect subsurface rays by restarting the loop. this uses less
+     * stack memory than invoking kernel_path_indirect.
+     */
+    if (ss_indirect.num_rays) {
+      kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput);
+    }
+    else {
+      break;
+    }
+  }
+#  endif /* __SUBSURFACE__ */
 }
 
-ccl_device void kernel_path_trace(KernelGlobals *kg,
-	ccl_global float *buffer,
-	int sample, int x, int y, int offset, int stride)
+ccl_device void kernel_path_trace(
+    KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
 {
-	PROFILING_INIT(kg, PROFILING_RAY_SETUP);
+  PROFILING_INIT(kg, PROFILING_RAY_SETUP);
 
-	/* buffer offset */
-	int index = offset + x + y*stride;
-	int pass_stride = kernel_data.film.pass_stride;
+  /* buffer offset */
+  int index = offset + x + y * stride;
+  int pass_stride = kernel_data.film.pass_stride;
 
-	buffer += index*pass_stride;
+  buffer += index * pass_stride;
 
-	/* Initialize random numbers and sample ray. */
-	uint rng_hash;
-	Ray ray;
+  /* Initialize random numbers and sample ray. */
+  uint rng_hash;
+  Ray ray;
 
-	kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
+  kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
 
-	if(ray.t == 0.0f) {
-		return;
-	}
+  if (ray.t == 0.0f) {
+    return;
+  }
 
-	/* Initialize state. */
-	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+  /* Initialize state. */
+  float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
 
-	PathRadiance L;
-	path_radiance_init(&L, kernel_data.film.use_light_pass);
+  PathRadiance L;
+  path_radiance_init(&L, kernel_data.film.use_light_pass);
 
-	ShaderDataTinyStorage emission_sd_storage;
-	ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ShaderDataTinyStorage emission_sd_storage;
+  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
 
-	PathState state;
-	path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
+  PathState state;
+  path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
 
-	/* Integrate. */
-	kernel_path_integrate(kg,
-	                      &state,
-	                      throughput,
-	                      &ray,
-	                      &L,
-	                      buffer,
-	                      emission_sd);
+  /* Integrate. */
+  kernel_path_integrate(kg, &state, throughput, &ray, &L, buffer, emission_sd);
 
-	kernel_write_result(kg, buffer, sample, &L);
+  kernel_write_result(kg, buffer, sample, &L);
 }
 
-#endif  /* __SPLIT_KERNEL__ */
+#endif /* __SPLIT_KERNEL__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 21da4d9308b..e8ce61024b3 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -25,297 +25,262 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
                                                ccl_addr_space PathState *state,
                                                float3 throughput)
 {
-	int num_samples = kernel_data.integrator.ao_samples;
-	float num_samples_inv = 1.0f/num_samples;
-	float ao_factor = kernel_data.background.ao_factor;
-	float3 ao_N;
-	float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
-	float3 ao_alpha = shader_bsdf_alpha(kg, sd);
-
-	for(int j = 0; j < num_samples; j++) {
-		float bsdf_u, bsdf_v;
-		path_branched_rng_2D(kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
-		float3 ao_D;
-		float ao_pdf;
-
-		sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
-		if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
-			Ray light_ray;
-			float3 ao_shadow;
-
-			light_ray.P = ray_offset(sd->P, sd->Ng);
-			light_ray.D = ao_D;
-			light_ray.t = kernel_data.background.ao_distance;
-			light_ray.time = sd->time;
-			light_ray.dP = sd->dP;
-			light_ray.dD = differential3_zero();
-
-			if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
-				path_radiance_accum_ao(L, state, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow);
-			}
-			else {
-				path_radiance_accum_total_ao(L, state, throughput*num_samples_inv, ao_bsdf);
-			}
-		}
-	}
+  int num_samples = kernel_data.integrator.ao_samples;
+  float num_samples_inv = 1.0f / num_samples;
+  float ao_factor = kernel_data.background.ao_factor;
+  float3 ao_N;
+  float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+  float3 ao_alpha = shader_bsdf_alpha(kg, sd);
+
+  for (int j = 0; j < num_samples; j++) {
+    float bsdf_u, bsdf_v;
+    path_branched_rng_2D(
+        kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+    float3 ao_D;
+    float ao_pdf;
+
+    sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+    if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+      Ray light_ray;
+      float3 ao_shadow;
+
+      light_ray.P = ray_offset(sd->P, sd->Ng);
+      light_ray.D = ao_D;
+      light_ray.t = kernel_data.background.ao_distance;
+      light_ray.time = sd->time;
+      light_ray.dP = sd->dP;
+      light_ray.dD = differential3_zero();
+
+      if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
+        path_radiance_accum_ao(
+            L, state, throughput * num_samples_inv, ao_alpha, ao_bsdf, ao_shadow);
+      }
+      else {
+        path_radiance_accum_total_ao(L, state, throughput * num_samples_inv, ao_bsdf);
+      }
+    }
+  }
 }
 
-#ifndef __SPLIT_KERNEL__
-
-#ifdef __VOLUME__
-ccl_device_forceinline void kernel_branched_path_volume(
-	KernelGlobals *kg,
-	ShaderData *sd,
-	PathState *state,
-	Ray *ray,
-	float3 *throughput,
-	ccl_addr_space Intersection *isect,
-	bool hit,
-	ShaderData *indirect_sd,
-	ShaderData *emission_sd,
-	PathRadiance *L)
+#  ifndef __SPLIT_KERNEL__
+
+#    ifdef __VOLUME__
+ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
+                                                        ShaderData *sd,
+                                                        PathState *state,
+                                                        Ray *ray,
+                                                        float3 *throughput,
+                                                        ccl_addr_space Intersection *isect,
+                                                        bool hit,
+                                                        ShaderData *indirect_sd,
+                                                        ShaderData *emission_sd,
+                                                        PathRadiance *L)
 {
-	/* Sanitize volume stack. */
-	if(!hit) {
-		kernel_volume_clean_stack(kg, state->volume_stack);
-	}
-
-	if(state->volume_stack[0].shader == SHADER_NONE) {
-		return;
-	}
-
-	/* volume attenuation, emission, scatter */
-	Ray volume_ray = *ray;
-	volume_ray.t = (hit)? isect->t: FLT_MAX;
-
-	bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
-
-#  ifdef __VOLUME_DECOUPLED__
-	/* decoupled ray marching only supported on CPU */
-	if(kernel_data.integrator.volume_decoupled) {
-		/* cache steps along volume for repeated sampling */
-		VolumeSegment volume_segment;
-
-		shader_setup_from_volume(kg, sd, &volume_ray);
-		kernel_volume_decoupled_record(kg, state,
-			&volume_ray, sd, &volume_segment, heterogeneous);
-
-		/* direct light sampling */
-		if(volume_segment.closure_flag & SD_SCATTER) {
-			volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
-
-			int all = kernel_data.integrator.sample_all_lights_direct;
-
-			kernel_branched_path_volume_connect_light(kg, sd,
-				emission_sd, *throughput, state, L, all,
-				&volume_ray, &volume_segment);
-
-			/* indirect light sampling */
-			int num_samples = kernel_data.integrator.volume_samples;
-			float num_samples_inv = 1.0f/num_samples;
-
-			for(int j = 0; j < num_samples; j++) {
-				PathState ps = *state;
-				Ray pray = *ray;
-				float3 tp = *throughput;
-
-				/* branch RNG state */
-				path_state_branch(&ps, j, num_samples);
-
-				/* scatter sample. if we use distance sampling and take just one
-				 * sample for direct and indirect light, we could share this
-				 * computation, but makes code a bit complex */
-				float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
-				float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
-
-				VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
-					&ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
-
-				if(result == VOLUME_PATH_SCATTERED &&
-				   kernel_path_volume_bounce(kg,
-				                             sd,
-				                             &tp,
-				                             &ps,
-				                             &L->state,
-				                             &pray))
-				{
-					kernel_path_indirect(kg,
-					                     indirect_sd,
-					                     emission_sd,
-					                     &pray,
-					                     tp*num_samples_inv,
-					                     &ps,
-					                     L);
-
-					/* for render passes, sum and reset indirect light pass variables
-					 * for the next samples */
-					path_radiance_sum_indirect(L);
-					path_radiance_reset_indirect(L);
-				}
-			}
-		}
-
-		/* emission and transmittance */
-		if(volume_segment.closure_flag & SD_EMISSION)
-			path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
-		*throughput *= volume_segment.accum_transmittance;
-
-		/* free cached steps */
-		kernel_volume_decoupled_free(kg, &volume_segment);
-	}
-	else
-#  endif  /* __VOLUME_DECOUPLED__ */
-	{
-		/* GPU: no decoupled ray marching, scatter probalistically */
-		int num_samples = kernel_data.integrator.volume_samples;
-		float num_samples_inv = 1.0f/num_samples;
-
-		/* todo: we should cache the shader evaluations from stepping
-		 * through the volume, for now we redo them multiple times */
-
-		for(int j = 0; j < num_samples; j++) {
-			PathState ps = *state;
-			Ray pray = *ray;
-			float3 tp = (*throughput) * num_samples_inv;
-
-			/* branch RNG state */
-			path_state_branch(&ps, j, num_samples);
-
-			VolumeIntegrateResult result = kernel_volume_integrate(
-				kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
-
-#  ifdef __VOLUME_SCATTER__
-			if(result == VOLUME_PATH_SCATTERED) {
-				/* todo: support equiangular, MIS and all light sampling.
-				 * alternatively get decoupled ray marching working on the GPU */
-				kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L);
-
-				if(kernel_path_volume_bounce(kg,
-				                             sd,
-				                             &tp,
-				                             &ps,
-				                             &L->state,
-				                             &pray))
-				{
-					kernel_path_indirect(kg,
-					                     indirect_sd,
-					                     emission_sd,
-					                     &pray,
-					                     tp,
-					                     &ps,
-					                     L);
-
-					/* for render passes, sum and reset indirect light pass variables
-					 * for the next samples */
-					path_radiance_sum_indirect(L);
-					path_radiance_reset_indirect(L);
-				}
-			}
-# endif  /* __VOLUME_SCATTER__ */
-		}
-
-		/* todo: avoid this calculation using decoupled ray marching */
-		kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput);
-	}
+  /* Sanitize volume stack. */
+  if (!hit) {
+    kernel_volume_clean_stack(kg, state->volume_stack);
+  }
+
+  if (state->volume_stack[0].shader == SHADER_NONE) {
+    return;
+  }
+
+  /* volume attenuation, emission, scatter */
+  Ray volume_ray = *ray;
+  volume_ray.t = (hit) ? isect->t : FLT_MAX;
+
+  bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+#      ifdef __VOLUME_DECOUPLED__
+  /* decoupled ray marching only supported on CPU */
+  if (kernel_data.integrator.volume_decoupled) {
+    /* cache steps along volume for repeated sampling */
+    VolumeSegment volume_segment;
+
+    shader_setup_from_volume(kg, sd, &volume_ray);
+    kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+
+    /* direct light sampling */
+    if (volume_segment.closure_flag & SD_SCATTER) {
+      volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+
+      int all = kernel_data.integrator.sample_all_lights_direct;
+
+      kernel_branched_path_volume_connect_light(
+          kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment);
+
+      /* indirect light sampling */
+      int num_samples = kernel_data.integrator.volume_samples;
+      float num_samples_inv = 1.0f / num_samples;
+
+      for (int j = 0; j < num_samples; j++) {
+        PathState ps = *state;
+        Ray pray = *ray;
+        float3 tp = *throughput;
+
+        /* branch RNG state */
+        path_state_branch(&ps, j, num_samples);
+
+        /* scatter sample. if we use distance sampling and take just one
+         * sample for direct and indirect light, we could share this
+         * computation, but makes code a bit complex */
+        float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
+        float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
+
+        VolumeIntegrateResult result = kernel_volume_decoupled_scatter(
+            kg, &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+
+        if (result == VOLUME_PATH_SCATTERED &&
+            kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
+          kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp * num_samples_inv, &ps, L);
+
+          /* for render passes, sum and reset indirect light pass variables
+           * for the next samples */
+          path_radiance_sum_indirect(L);
+          path_radiance_reset_indirect(L);
+        }
+      }
+    }
+
+    /* emission and transmittance */
+    if (volume_segment.closure_flag & SD_EMISSION)
+      path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
+    *throughput *= volume_segment.accum_transmittance;
+
+    /* free cached steps */
+    kernel_volume_decoupled_free(kg, &volume_segment);
+  }
+  else
+#      endif /* __VOLUME_DECOUPLED__ */
+  {
+    /* GPU: no decoupled ray marching, scatter probalistically */
+    int num_samples = kernel_data.integrator.volume_samples;
+    float num_samples_inv = 1.0f / num_samples;
+
+    /* todo: we should cache the shader evaluations from stepping
+     * through the volume, for now we redo them multiple times */
+
+    for (int j = 0; j < num_samples; j++) {
+      PathState ps = *state;
+      Ray pray = *ray;
+      float3 tp = (*throughput) * num_samples_inv;
+
+      /* branch RNG state */
+      path_state_branch(&ps, j, num_samples);
+
+      VolumeIntegrateResult result = kernel_volume_integrate(
+          kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
+
+#      ifdef __VOLUME_SCATTER__
+      if (result == VOLUME_PATH_SCATTERED) {
+        /* todo: support equiangular, MIS and all light sampling.
+         * alternatively get decoupled ray marching working on the GPU */
+        kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L);
+
+        if (kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
+          kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp, &ps, L);
+
+          /* for render passes, sum and reset indirect light pass variables
+           * for the next samples */
+          path_radiance_sum_indirect(L);
+          path_radiance_reset_indirect(L);
+        }
+      }
+#      endif /* __VOLUME_SCATTER__ */
+    }
+
+    /* todo: avoid this calculation using decoupled ray marching */
+    kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput);
+  }
 }
-#endif  /* __VOLUME__ */
+#    endif /* __VOLUME__ */
 
 /* bounce off surface and integrate indirect light */
 ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
-	ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd,
-	float3 throughput, float num_samples_adjust, PathState *state, PathRadiance *L)
+                                                                     ShaderData *sd,
+                                                                     ShaderData *indirect_sd,
+                                                                     ShaderData *emission_sd,
+                                                                     float3 throughput,
+                                                                     float num_samples_adjust,
+                                                                     PathState *state,
+                                                                     PathRadiance *L)
 {
-	float sum_sample_weight = 0.0f;
-#ifdef __DENOISING_FEATURES__
-	if(state->denoising_feature_weight > 0.0f) {
-		for(int i = 0; i < sd->num_closure; i++) {
-			const ShaderClosure *sc = &sd->closure[i];
-
-			/* transparency is not handled here, but in outer loop */
-			if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
-				continue;
-			}
-
-			sum_sample_weight += sc->sample_weight;
-		}
-	}
-	else {
-		sum_sample_weight = 1.0f;
-	}
-#endif  /* __DENOISING_FEATURES__ */
-
-	for(int i = 0; i < sd->num_closure; i++) {
-		const ShaderClosure *sc = &sd->closure[i];
-
-		/* transparency is not handled here, but in outer loop */
-		if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
-			continue;
-		}
-
-		int num_samples;
-
-		if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
-			num_samples = kernel_data.integrator.diffuse_samples;
-		else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
-			num_samples = 1;
-		else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
-			num_samples = kernel_data.integrator.glossy_samples;
-		else
-			num_samples = kernel_data.integrator.transmission_samples;
-
-		num_samples = ceil_to_int(num_samples_adjust*num_samples);
-
-		float num_samples_inv = num_samples_adjust/num_samples;
-
-		for(int j = 0; j < num_samples; j++) {
-			PathState ps = *state;
-			float3 tp = throughput;
-			Ray bsdf_ray;
-#ifdef __SHADOW_TRICKS__
-			float shadow_transparency = L->shadow_transparency;
-#endif
-
-			ps.rng_hash = cmj_hash(state->rng_hash, i);
-
-			if(!kernel_branched_path_surface_bounce(kg,
-			                                        sd,
-			                                        sc,
-			                                        j,
-			                                        num_samples,
-			                                        &tp,
-			                                        &ps,
-			                                        &L->state,
-			                                        &bsdf_ray,
-			                                        sum_sample_weight))
-			{
-				continue;
-			}
-
-			ps.rng_hash = state->rng_hash;
-
-			kernel_path_indirect(kg,
-			                     indirect_sd,
-			                     emission_sd,
-			                     &bsdf_ray,
-			                     tp*num_samples_inv,
-			                     &ps,
-			                     L);
-
-			/* for render passes, sum and reset indirect light pass variables
-			 * for the next samples */
-			path_radiance_sum_indirect(L);
-			path_radiance_reset_indirect(L);
-
-#ifdef __SHADOW_TRICKS__
-			L->shadow_transparency = shadow_transparency;
-#endif
-		}
-	}
+  float sum_sample_weight = 0.0f;
+#    ifdef __DENOISING_FEATURES__
+  if (state->denoising_feature_weight > 0.0f) {
+    for (int i = 0; i < sd->num_closure; i++) {
+      const ShaderClosure *sc = &sd->closure[i];
+
+      /* transparency is not handled here, but in outer loop */
+      if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+        continue;
+      }
+
+      sum_sample_weight += sc->sample_weight;
+    }
+  }
+  else {
+    sum_sample_weight = 1.0f;
+  }
+#    endif /* __DENOISING_FEATURES__ */
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    const ShaderClosure *sc = &sd->closure[i];
+
+    /* transparency is not handled here, but in outer loop */
+    if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+      continue;
+    }
+
+    int num_samples;
+
+    if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+      num_samples = kernel_data.integrator.diffuse_samples;
+    else if (CLOSURE_IS_BSDF_BSSRDF(sc->type))
+      num_samples = 1;
+    else if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+      num_samples = kernel_data.integrator.glossy_samples;
+    else
+      num_samples = kernel_data.integrator.transmission_samples;
+
+    num_samples = ceil_to_int(num_samples_adjust * num_samples);
+
+    float num_samples_inv = num_samples_adjust / num_samples;
+
+    for (int j = 0; j < num_samples; j++) {
+      PathState ps = *state;
+      float3 tp = throughput;
+      Ray bsdf_ray;
+#    ifdef __SHADOW_TRICKS__
+      float shadow_transparency = L->shadow_transparency;
+#    endif
+
+      ps.rng_hash = cmj_hash(state->rng_hash, i);
+
+      if (!kernel_branched_path_surface_bounce(
+              kg, sd, sc, j, num_samples, &tp, &ps, &L->state, &bsdf_ray, sum_sample_weight)) {
+        continue;
+      }
+
+      ps.rng_hash = state->rng_hash;
+
+      kernel_path_indirect(kg, indirect_sd, emission_sd, &bsdf_ray, tp * num_samples_inv, &ps, L);
+
+      /* for render passes, sum and reset indirect light pass variables
+       * for the next samples */
+      path_radiance_sum_indirect(L);
+      path_radiance_reset_indirect(L);
+
+#    ifdef __SHADOW_TRICKS__
+      L->shadow_transparency = shadow_transparency;
+#    endif
+    }
+  }
 }
 
-#ifdef __SUBSURFACE__
+#    ifdef __SUBSURFACE__
 ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                                                         ShaderData *sd,
                                                         ShaderData *indirect_sd,
@@ -325,111 +290,81 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                                                         Ray *ray,
                                                         float3 throughput)
 {
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
-
-		if(!CLOSURE_IS_BSSRDF(sc->type))
-			continue;
-
-		/* set up random number generator */
-		uint lcg_state = lcg_state_init(state, 0x68bc21eb);
-		int num_samples = kernel_data.integrator.subsurface_samples * 3;
-		float num_samples_inv = 1.0f/num_samples;
-		uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i);
-
-		/* do subsurface scatter step with copy of shader data, this will
-		 * replace the BSSRDF with a diffuse BSDF closure */
-		for(int j = 0; j < num_samples; j++) {
-			PathState hit_state = *state;
-			path_state_branch(&hit_state, j, num_samples);
-			hit_state.rng_hash = bssrdf_rng_hash;
-
-			LocalIntersection ss_isect;
-			float bssrdf_u, bssrdf_v;
-			path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
-			int num_hits = subsurface_scatter_multi_intersect(kg,
-			                                                  &ss_isect,
-			                                                  sd,
-			                                                  &hit_state,
-			                                                  sc,
-			                                                  &lcg_state,
-			                                                  bssrdf_u, bssrdf_v,
-			                                                  true);
-
-			hit_state.rng_offset += PRNG_BOUNCE_NUM;
-
-#ifdef __VOLUME__
-			Ray volume_ray = *ray;
-			bool need_update_volume_stack =
-			        kernel_data.integrator.use_volumes &&
-			        sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
-#endif  /* __VOLUME__ */
-
-			/* compute lighting with the BSDF closure */
-			for(int hit = 0; hit < num_hits; hit++) {
-				ShaderData bssrdf_sd = *sd;
-				Bssrdf *bssrdf = (Bssrdf *)sc;
-				ClosureType bssrdf_type = sc->type;
-				float bssrdf_roughness = bssrdf->roughness;
-				subsurface_scatter_multi_setup(kg,
-				                               &ss_isect,
-				                               hit,
-				                               &bssrdf_sd,
-				                               &hit_state,
-				                               bssrdf_type,
-				                               bssrdf_roughness);
-
-#ifdef __VOLUME__
-				if(need_update_volume_stack) {
-					/* Setup ray from previous surface point to the new one. */
-					float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
-					volume_ray.D = normalize_len(P - volume_ray.P,
-					                             &volume_ray.t);
-
-					for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
-						hit_state.volume_stack[k] = state->volume_stack[k];
-					}
-
-					kernel_volume_stack_update_for_subsurface(
-					    kg,
-					    emission_sd,
-					    &volume_ray,
-					    hit_state.volume_stack);
-				}
-#endif  /* __VOLUME__ */
-
-#ifdef __EMISSION__
-				/* direct light */
-				if(kernel_data.integrator.use_direct_light) {
-					int all = (kernel_data.integrator.sample_all_lights_direct) ||
-					          (hit_state.flag & PATH_RAY_SHADOW_CATCHER);
-					kernel_branched_path_surface_connect_light(
-					        kg,
-					        &bssrdf_sd,
-					        emission_sd,
-					        &hit_state,
-					        throughput,
-					        num_samples_inv,
-					        L,
-					        all);
-				}
-#endif  /* __EMISSION__ */
-
-				/* indirect light */
-				kernel_branched_path_surface_indirect_light(
-				        kg,
-				        &bssrdf_sd,
-				        indirect_sd,
-				        emission_sd,
-				        throughput,
-				        num_samples_inv,
-				        &hit_state,
-				        L);
-			}
-		}
-	}
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
+
+    if (!CLOSURE_IS_BSSRDF(sc->type))
+      continue;
+
+    /* set up random number generator */
+    uint lcg_state = lcg_state_init(state, 0x68bc21eb);
+    int num_samples = kernel_data.integrator.subsurface_samples * 3;
+    float num_samples_inv = 1.0f / num_samples;
+    uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i);
+
+    /* do subsurface scatter step with copy of shader data, this will
+     * replace the BSSRDF with a diffuse BSDF closure */
+    for (int j = 0; j < num_samples; j++) {
+      PathState hit_state = *state;
+      path_state_branch(&hit_state, j, num_samples);
+      hit_state.rng_hash = bssrdf_rng_hash;
+
+      LocalIntersection ss_isect;
+      float bssrdf_u, bssrdf_v;
+      path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+      int num_hits = subsurface_scatter_multi_intersect(
+          kg, &ss_isect, sd, &hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+
+      hit_state.rng_offset += PRNG_BOUNCE_NUM;
+
+#      ifdef __VOLUME__
+      Ray volume_ray = *ray;
+      bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+                                      sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+#      endif /* __VOLUME__ */
+
+      /* compute lighting with the BSDF closure */
+      for (int hit = 0; hit < num_hits; hit++) {
+        ShaderData bssrdf_sd = *sd;
+        Bssrdf *bssrdf = (Bssrdf *)sc;
+        ClosureType bssrdf_type = sc->type;
+        float bssrdf_roughness = bssrdf->roughness;
+        subsurface_scatter_multi_setup(
+            kg, &ss_isect, hit, &bssrdf_sd, &hit_state, bssrdf_type, bssrdf_roughness);
+
+#      ifdef __VOLUME__
+        if (need_update_volume_stack) {
+          /* Setup ray from previous surface point to the new one. */
+          float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
+          volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+          for (int k = 0; k < VOLUME_STACK_SIZE; k++) {
+            hit_state.volume_stack[k] = state->volume_stack[k];
+          }
+
+          kernel_volume_stack_update_for_subsurface(
+              kg, emission_sd, &volume_ray, hit_state.volume_stack);
+        }
+#      endif /* __VOLUME__ */
+
+#      ifdef __EMISSION__
+        /* direct light */
+        if (kernel_data.integrator.use_direct_light) {
+          int all = (kernel_data.integrator.sample_all_lights_direct) ||
+                    (hit_state.flag & PATH_RAY_SHADOW_CATCHER);
+          kernel_branched_path_surface_connect_light(
+              kg, &bssrdf_sd, emission_sd, &hit_state, throughput, num_samples_inv, L, all);
+        }
+#      endif /* __EMISSION__ */
+
+        /* indirect light */
+        kernel_branched_path_surface_indirect_light(
+            kg, &bssrdf_sd, indirect_sd, emission_sd, throughput, num_samples_inv, &hit_state, L);
+      }
+    }
+  }
 }
-#endif  /* __SUBSURFACE__ */
+#    endif /* __SUBSURFACE__ */
 
 ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
                                                uint rng_hash,
@@ -438,188 +373,171 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
                                                ccl_global float *buffer,
                                                PathRadiance *L)
 {
-	/* initialize */
-	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-
-	path_radiance_init(L, kernel_data.film.use_light_pass);
-
-	/* shader data memory used for both volumes and surfaces, saves stack space */
-	ShaderData sd;
-	/* shader data used by emission, shadows, volume stacks, indirect path */
-	ShaderDataTinyStorage emission_sd_storage;
-	ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-	ShaderData indirect_sd;
-
-	PathState state;
-	path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
-
-	/* Main Loop
-	 * Here we only handle transparency intersections from the camera ray.
-	 * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
-	 */
-	for(;;) {
-		/* Find intersection with objects in scene. */
-		Intersection isect;
-		bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
-
-#ifdef __VOLUME__
-		/* Volume integration. */
-		kernel_branched_path_volume(kg,
-		                            &sd,
-		                            &state,
-		                            &ray,
-		                            &throughput,
-		                            &isect,
-		                            hit,
-		                            &indirect_sd,
-		                            emission_sd,
-		                            L);
-#endif  /* __VOLUME__ */
-
-		/* Shade background. */
-		if(!hit) {
-			kernel_path_background(kg, &state, &ray, throughput, &sd, L);
-			break;
-		}
-
-		/* Setup and evaluate shader. */
-		shader_setup_from_ray(kg, &sd, &isect, &ray);
-
-		/* Skip most work for volume bounding surface. */
-#ifdef __VOLUME__
-		if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
-#endif
-
-		shader_eval_surface(kg, &sd, &state, state.flag);
-		shader_merge_closures(&sd);
-
-		/* Apply shadow catcher, holdout, emission. */
-		if(!kernel_path_shader_apply(kg,
-		                             &sd,
-		                             &state,
-		                             &ray,
-		                             throughput,
-		                             emission_sd,
-		                             L,
-		                             buffer))
-		{
-			break;
-		}
-
-		/* transparency termination */
-		if(state.flag & PATH_RAY_TRANSPARENT) {
-			/* path termination. this is a strange place to put the termination, it's
-			 * mainly due to the mixed in MIS that we use. gives too many unneeded
-			 * shader evaluations, only need emission if we are going to terminate */
-			float probability = path_state_continuation_probability(kg, &state, throughput);
-
-			if(probability == 0.0f) {
-				break;
-			}
-			else if(probability != 1.0f) {
-				float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE);
-
-				if(terminate >= probability)
-					break;
-
-				throughput /= probability;
-			}
-		}
-
-		kernel_update_denoising_features(kg, &sd, &state, L);
-
-#ifdef __AO__
-		/* ambient occlusion */
-		if(kernel_data.integrator.use_ambient_occlusion) {
-			kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
-		}
-#endif  /* __AO__ */
-
-#ifdef __SUBSURFACE__
-		/* bssrdf scatter to a different location on the same object */
-		if(sd.flag & SD_BSSRDF) {
-			kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, emission_sd,
-			                                        L, &state, &ray, throughput);
-		}
-#endif  /* __SUBSURFACE__ */
-
-		PathState hit_state = state;
-
-#ifdef __EMISSION__
-		/* direct light */
-		if(kernel_data.integrator.use_direct_light) {
-			int all = (kernel_data.integrator.sample_all_lights_direct) ||
-					  (state.flag & PATH_RAY_SHADOW_CATCHER);
-			kernel_branched_path_surface_connect_light(kg,
-				&sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
-		}
-#endif  /* __EMISSION__ */
-
-		/* indirect light */
-		kernel_branched_path_surface_indirect_light(kg,
-			&sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L);
-
-		/* continue in case of transparency */
-		throughput *= shader_bsdf_transparency(kg, &sd);
-
-		if(is_zero(throughput))
-			break;
-
-		/* Update Path State */
-		path_state_next(kg, &state, LABEL_TRANSPARENT);
-
-#ifdef __VOLUME__
-		}
-		else {
-			if(!path_state_volume_next(kg, &state)) {
-				break;
-			}
-		}
-#endif
-
-		ray.P = ray_offset(sd.P, -sd.Ng);
-		ray.t -= sd.ray_length; /* clipping works through transparent */
-
-#ifdef __RAY_DIFFERENTIALS__
-		ray.dP = sd.dP;
-		ray.dD.dx = -sd.dI.dx;
-		ray.dD.dy = -sd.dI.dy;
-#endif  /* __RAY_DIFFERENTIALS__ */
-
-#ifdef __VOLUME__
-		/* enter/exit volume */
-		kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
-#endif  /* __VOLUME__ */
-	}
+  /* initialize */
+  float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+
+  path_radiance_init(L, kernel_data.film.use_light_pass);
+
+  /* shader data memory used for both volumes and surfaces, saves stack space */
+  ShaderData sd;
+  /* shader data used by emission, shadows, volume stacks, indirect path */
+  ShaderDataTinyStorage emission_sd_storage;
+  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ShaderData indirect_sd;
+
+  PathState state;
+  path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
+
+  /* Main Loop
+   * Here we only handle transparency intersections from the camera ray.
+   * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
+   */
+  for (;;) {
+    /* Find intersection with objects in scene. */
+    Intersection isect;
+    bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
+
+#    ifdef __VOLUME__
+    /* Volume integration. */
+    kernel_branched_path_volume(
+        kg, &sd, &state, &ray, &throughput, &isect, hit, &indirect_sd, emission_sd, L);
+#    endif /* __VOLUME__ */
+
+    /* Shade background. */
+    if (!hit) {
+      kernel_path_background(kg, &state, &ray, throughput, &sd, L);
+      break;
+    }
+
+    /* Setup and evaluate shader. */
+    shader_setup_from_ray(kg, &sd, &isect, &ray);
+
+    /* Skip most work for volume bounding surface. */
+#    ifdef __VOLUME__
+    if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+#    endif
+
+      shader_eval_surface(kg, &sd, &state, state.flag);
+      shader_merge_closures(&sd);
+
+      /* Apply shadow catcher, holdout, emission. */
+      if (!kernel_path_shader_apply(kg, &sd, &state, &ray, throughput, emission_sd, L, buffer)) {
+        break;
+      }
+
+      /* transparency termination */
+      if (state.flag & PATH_RAY_TRANSPARENT) {
+        /* path termination. this is a strange place to put the termination, it's
+       * mainly due to the mixed in MIS that we use. gives too many unneeded
+       * shader evaluations, only need emission if we are going to terminate */
+        float probability = path_state_continuation_probability(kg, &state, throughput);
+
+        if (probability == 0.0f) {
+          break;
+        }
+        else if (probability != 1.0f) {
+          float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE);
+
+          if (terminate >= probability)
+            break;
+
+          throughput /= probability;
+        }
+      }
+
+      kernel_update_denoising_features(kg, &sd, &state, L);
+
+#    ifdef __AO__
+      /* ambient occlusion */
+      if (kernel_data.integrator.use_ambient_occlusion) {
+        kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
+      }
+#    endif /* __AO__ */
+
+#    ifdef __SUBSURFACE__
+      /* bssrdf scatter to a different location on the same object */
+      if (sd.flag & SD_BSSRDF) {
+        kernel_branched_path_subsurface_scatter(
+            kg, &sd, &indirect_sd, emission_sd, L, &state, &ray, throughput);
+      }
+#    endif /* __SUBSURFACE__ */
+
+      PathState hit_state = state;
+
+#    ifdef __EMISSION__
+      /* direct light */
+      if (kernel_data.integrator.use_direct_light) {
+        int all = (kernel_data.integrator.sample_all_lights_direct) ||
+                  (state.flag & PATH_RAY_SHADOW_CATCHER);
+        kernel_branched_path_surface_connect_light(
+            kg, &sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
+      }
+#    endif /* __EMISSION__ */
+
+      /* indirect light */
+      kernel_branched_path_surface_indirect_light(
+          kg, &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L);
+
+      /* continue in case of transparency */
+      throughput *= shader_bsdf_transparency(kg, &sd);
+
+      if (is_zero(throughput))
+        break;
+
+      /* Update Path State */
+      path_state_next(kg, &state, LABEL_TRANSPARENT);
+
+#    ifdef __VOLUME__
+    }
+    else {
+      if (!path_state_volume_next(kg, &state)) {
+        break;
+      }
+    }
+#    endif
+
+    ray.P = ray_offset(sd.P, -sd.Ng);
+    ray.t -= sd.ray_length; /* clipping works through transparent */
+
+#    ifdef __RAY_DIFFERENTIALS__
+    ray.dP = sd.dP;
+    ray.dD.dx = -sd.dI.dx;
+    ray.dD.dy = -sd.dI.dy;
+#    endif /* __RAY_DIFFERENTIALS__ */
+
+#    ifdef __VOLUME__
+    /* enter/exit volume */
+    kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
+#    endif /* __VOLUME__ */
+  }
 }
 
-ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
-	ccl_global float *buffer,
-	int sample, int x, int y, int offset, int stride)
+ccl_device void kernel_branched_path_trace(
+    KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
 {
-	/* buffer offset */
-	int index = offset + x + y*stride;
-	int pass_stride = kernel_data.film.pass_stride;
+  /* buffer offset */
+  int index = offset + x + y * stride;
+  int pass_stride = kernel_data.film.pass_stride;
 
-	buffer += index*pass_stride;
+  buffer += index * pass_stride;
 
-	/* initialize random numbers and ray */
-	uint rng_hash;
-	Ray ray;
+  /* initialize random numbers and ray */
+  uint rng_hash;
+  Ray ray;
 
-	kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
+  kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
 
-	/* integrate */
-	PathRadiance L;
+  /* integrate */
+  PathRadiance L;
 
-	if(ray.t != 0.0f) {
-		kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L);
-		kernel_write_result(kg, buffer, sample, &L);
-	}
+  if (ray.t != 0.0f) {
+    kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L);
+    kernel_write_result(kg, buffer, sample, &L);
+  }
 }
 
-#endif  /* __SPLIT_KERNEL__ */
+#  endif /* __SPLIT_KERNEL__ */
 
-#endif  /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_common.h b/intern/cycles/kernel/kernel_path_common.h
index d83fd474cde..815767595a9 100644
--- a/intern/cycles/kernel/kernel_path_common.h
+++ b/intern/cycles/kernel/kernel_path_common.h
@@ -18,34 +18,31 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,
-                                               int sample,
-                                               int x, int y,
-                                               uint *rng_hash,
-                                               ccl_addr_space Ray *ray)
+ccl_device_inline void kernel_path_trace_setup(
+    KernelGlobals *kg, int sample, int x, int y, uint *rng_hash, ccl_addr_space Ray *ray)
 {
-	float filter_u;
-	float filter_v;
+  float filter_u;
+  float filter_v;
 
-	int num_samples = kernel_data.integrator.aa_samples;
+  int num_samples = kernel_data.integrator.aa_samples;
 
-	path_rng_init(kg, sample, num_samples, rng_hash, x, y, &filter_u, &filter_v);
+  path_rng_init(kg, sample, num_samples, rng_hash, x, y, &filter_u, &filter_v);
 
-	/* sample camera ray */
+  /* sample camera ray */
 
-	float lens_u = 0.0f, lens_v = 0.0f;
+  float lens_u = 0.0f, lens_v = 0.0f;
 
-	if(kernel_data.cam.aperturesize > 0.0f)
-		path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
+  if (kernel_data.cam.aperturesize > 0.0f)
+    path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
 
-	float time = 0.0f;
+  float time = 0.0f;
 
 #ifdef __CAMERA_MOTION__
-	if(kernel_data.cam.shuttertime != -1.0f)
-		time = path_rng_1D(kg, *rng_hash, sample, num_samples, PRNG_TIME);
+  if (kernel_data.cam.shuttertime != -1.0f)
+    time = path_rng_1D(kg, *rng_hash, sample, num_samples, PRNG_TIME);
 #endif
 
-	camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
+  camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index e85050df4bb..cdca0b1f9bf 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -23,249 +23,252 @@ ccl_device_inline void path_state_init(KernelGlobals *kg,
                                        int sample,
                                        ccl_addr_space Ray *ray)
 {
-	state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP|PATH_RAY_TRANSPARENT_BACKGROUND;
+  state->flag = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP | PATH_RAY_TRANSPARENT_BACKGROUND;
 
-	state->rng_hash = rng_hash;
-	state->rng_offset = PRNG_BASE_NUM;
-	state->sample = sample;
-	state->num_samples = kernel_data.integrator.aa_samples;
-	state->branch_factor = 1.0f;
+  state->rng_hash = rng_hash;
+  state->rng_offset = PRNG_BASE_NUM;
+  state->sample = sample;
+  state->num_samples = kernel_data.integrator.aa_samples;
+  state->branch_factor = 1.0f;
 
-	state->bounce = 0;
-	state->diffuse_bounce = 0;
-	state->glossy_bounce = 0;
-	state->transmission_bounce = 0;
-	state->transparent_bounce = 0;
+  state->bounce = 0;
+  state->diffuse_bounce = 0;
+  state->glossy_bounce = 0;
+  state->transmission_bounce = 0;
+  state->transparent_bounce = 0;
 
 #ifdef __DENOISING_FEATURES__
-	if(kernel_data.film.pass_denoising_data) {
-		state->flag |= PATH_RAY_STORE_SHADOW_INFO;
-		state->denoising_feature_weight = 1.0f;
-	}
-	else {
-		state->denoising_feature_weight = 0.0f;
-	}
-#endif  /* __DENOISING_FEATURES__ */
-
-	state->min_ray_pdf = FLT_MAX;
-	state->ray_pdf = 0.0f;
+  if (kernel_data.film.pass_denoising_data) {
+    state->flag |= PATH_RAY_STORE_SHADOW_INFO;
+    state->denoising_feature_weight = 1.0f;
+  }
+  else {
+    state->denoising_feature_weight = 0.0f;
+  }
+#endif /* __DENOISING_FEATURES__ */
+
+  state->min_ray_pdf = FLT_MAX;
+  state->ray_pdf = 0.0f;
 #ifdef __LAMP_MIS__
-	state->ray_t = 0.0f;
+  state->ray_t = 0.0f;
 #endif
 
 #ifdef __VOLUME__
-	state->volume_bounce = 0;
-	state->volume_bounds_bounce = 0;
-
-	if(kernel_data.integrator.use_volumes) {
-		/* Initialize volume stack with volume we are inside of. */
-		kernel_volume_stack_init(kg, stack_sd, state, ray, state->volume_stack);
-	}
-	else {
-		state->volume_stack[0].shader = SHADER_NONE;
-	}
+  state->volume_bounce = 0;
+  state->volume_bounds_bounce = 0;
+
+  if (kernel_data.integrator.use_volumes) {
+    /* Initialize volume stack with volume we are inside of. */
+    kernel_volume_stack_init(kg, stack_sd, state, ray, state->volume_stack);
+  }
+  else {
+    state->volume_stack[0].shader = SHADER_NONE;
+  }
 #endif
 }
 
-ccl_device_inline void path_state_next(KernelGlobals *kg, ccl_addr_space PathState *state, int label)
+ccl_device_inline void path_state_next(KernelGlobals *kg,
+                                       ccl_addr_space PathState *state,
+                                       int label)
 {
-	/* ray through transparent keeps same flags from previous ray and is
-	 * not counted as a regular bounce, transparent has separate max */
-	if(label & LABEL_TRANSPARENT) {
-		state->flag |= PATH_RAY_TRANSPARENT;
-		state->transparent_bounce++;
-		if(state->transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
-			state->flag |= PATH_RAY_TERMINATE_IMMEDIATE;
-		}
+  /* ray through transparent keeps same flags from previous ray and is
+   * not counted as a regular bounce, transparent has separate max */
+  if (label & LABEL_TRANSPARENT) {
+    state->flag |= PATH_RAY_TRANSPARENT;
+    state->transparent_bounce++;
+    if (state->transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
+      state->flag |= PATH_RAY_TERMINATE_IMMEDIATE;
+    }
 
-		if(!kernel_data.integrator.transparent_shadows)
-			state->flag |= PATH_RAY_MIS_SKIP;
+    if (!kernel_data.integrator.transparent_shadows)
+      state->flag |= PATH_RAY_MIS_SKIP;
 
-		/* random number generator next bounce */
-		state->rng_offset += PRNG_BOUNCE_NUM;
+    /* random number generator next bounce */
+    state->rng_offset += PRNG_BOUNCE_NUM;
 
-		return;
-	}
+    return;
+  }
 
-	state->bounce++;
-	if(state->bounce >= kernel_data.integrator.max_bounce) {
-		state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
-	}
+  state->bounce++;
+  if (state->bounce >= kernel_data.integrator.max_bounce) {
+    state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+  }
 
-	state->flag &= ~(PATH_RAY_ALL_VISIBILITY|PATH_RAY_MIS_SKIP);
+  state->flag &= ~(PATH_RAY_ALL_VISIBILITY | PATH_RAY_MIS_SKIP);
 
 #ifdef __VOLUME__
-	if(label & LABEL_VOLUME_SCATTER) {
-		/* volume scatter */
-		state->flag |= PATH_RAY_VOLUME_SCATTER;
-		state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
-
-		state->volume_bounce++;
-		if(state->volume_bounce >= kernel_data.integrator.max_volume_bounce) {
-			state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
-		}
-	}
-	else
+  if (label & LABEL_VOLUME_SCATTER) {
+    /* volume scatter */
+    state->flag |= PATH_RAY_VOLUME_SCATTER;
+    state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
+
+    state->volume_bounce++;
+    if (state->volume_bounce >= kernel_data.integrator.max_volume_bounce) {
+      state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+    }
+  }
+  else
 #endif
-	{
-		/* surface reflection/transmission */
-		if(label & LABEL_REFLECT) {
-			state->flag |= PATH_RAY_REFLECT;
-			state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
-
-			if(label & LABEL_DIFFUSE) {
-				state->diffuse_bounce++;
-				if(state->diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) {
-					state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
-				}
-			}
-			else {
-				state->glossy_bounce++;
-				if(state->glossy_bounce >= kernel_data.integrator.max_glossy_bounce) {
-					state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
-				}
-			}
-		}
-		else {
-			kernel_assert(label & LABEL_TRANSMIT);
-
-			state->flag |= PATH_RAY_TRANSMIT;
-
-			if(!(label & LABEL_TRANSMIT_TRANSPARENT)) {
-				state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
-			}
-
-			state->transmission_bounce++;
-			if(state->transmission_bounce >= kernel_data.integrator.max_transmission_bounce) {
-				state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
-			}
-		}
-
-		/* diffuse/glossy/singular */
-		if(label & LABEL_DIFFUSE) {
-			state->flag |= PATH_RAY_DIFFUSE|PATH_RAY_DIFFUSE_ANCESTOR;
-		}
-		else if(label & LABEL_GLOSSY) {
-			state->flag |= PATH_RAY_GLOSSY;
-		}
-		else {
-			kernel_assert(label & LABEL_SINGULAR);
-			state->flag |= PATH_RAY_GLOSSY|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP;
-		}
-	}
-
-	/* random number generator next bounce */
-	state->rng_offset += PRNG_BOUNCE_NUM;
+  {
+    /* surface reflection/transmission */
+    if (label & LABEL_REFLECT) {
+      state->flag |= PATH_RAY_REFLECT;
+      state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
+
+      if (label & LABEL_DIFFUSE) {
+        state->diffuse_bounce++;
+        if (state->diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) {
+          state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+        }
+      }
+      else {
+        state->glossy_bounce++;
+        if (state->glossy_bounce >= kernel_data.integrator.max_glossy_bounce) {
+          state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+        }
+      }
+    }
+    else {
+      kernel_assert(label & LABEL_TRANSMIT);
+
+      state->flag |= PATH_RAY_TRANSMIT;
+
+      if (!(label & LABEL_TRANSMIT_TRANSPARENT)) {
+        state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
+      }
+
+      state->transmission_bounce++;
+      if (state->transmission_bounce >= kernel_data.integrator.max_transmission_bounce) {
+        state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+      }
+    }
+
+    /* diffuse/glossy/singular */
+    if (label & LABEL_DIFFUSE) {
+      state->flag |= PATH_RAY_DIFFUSE | PATH_RAY_DIFFUSE_ANCESTOR;
+    }
+    else if (label & LABEL_GLOSSY) {
+      state->flag |= PATH_RAY_GLOSSY;
+    }
+    else {
+      kernel_assert(label & LABEL_SINGULAR);
+      state->flag |= PATH_RAY_GLOSSY | PATH_RAY_SINGULAR | PATH_RAY_MIS_SKIP;
+    }
+  }
+
+  /* random number generator next bounce */
+  state->rng_offset += PRNG_BOUNCE_NUM;
 
 #ifdef __DENOISING_FEATURES__
-	if((state->denoising_feature_weight == 0.0f) && !(state->flag & PATH_RAY_SHADOW_CATCHER)) {
-		state->flag &= ~PATH_RAY_STORE_SHADOW_INFO;
-	}
+  if ((state->denoising_feature_weight == 0.0f) && !(state->flag & PATH_RAY_SHADOW_CATCHER)) {
+    state->flag &= ~PATH_RAY_STORE_SHADOW_INFO;
+  }
 #endif
 }
 
 #ifdef __VOLUME__
 ccl_device_inline bool path_state_volume_next(KernelGlobals *kg, ccl_addr_space PathState *state)
 {
-	/* For volume bounding meshes we pass through without counting transparent
-	 * bounces, only sanity check in case self intersection gets us stuck. */
-	state->volume_bounds_bounce++;
-	if(state->volume_bounds_bounce > VOLUME_BOUNDS_MAX) {
-		return false;
-	}
-
-	/* Random number generator next bounce. */
-	if(state->volume_bounds_bounce > 1) {
-		state->rng_offset += PRNG_BOUNCE_NUM;
-	}
-
-	return true;
+  /* For volume bounding meshes we pass through without counting transparent
+   * bounces, only sanity check in case self intersection gets us stuck. */
+  state->volume_bounds_bounce++;
+  if (state->volume_bounds_bounce > VOLUME_BOUNDS_MAX) {
+    return false;
+  }
+
+  /* Random number generator next bounce. */
+  if (state->volume_bounds_bounce > 1) {
+    state->rng_offset += PRNG_BOUNCE_NUM;
+  }
+
+  return true;
 }
 #endif
 
-ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, ccl_addr_space PathState *state)
+ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg,
+                                                 ccl_addr_space PathState *state)
 {
-	uint flag = state->flag & PATH_RAY_ALL_VISIBILITY;
+  uint flag = state->flag & PATH_RAY_ALL_VISIBILITY;
 
-	/* for visibility, diffuse/glossy are for reflection only */
-	if(flag & PATH_RAY_TRANSMIT)
-		flag &= ~(PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY);
-	/* todo: this is not supported as its own ray visibility yet */
-	if(state->flag & PATH_RAY_VOLUME_SCATTER)
-		flag |= PATH_RAY_DIFFUSE;
+  /* for visibility, diffuse/glossy are for reflection only */
+  if (flag & PATH_RAY_TRANSMIT)
+    flag &= ~(PATH_RAY_DIFFUSE | PATH_RAY_GLOSSY);
+  /* todo: this is not supported as its own ray visibility yet */
+  if (state->flag & PATH_RAY_VOLUME_SCATTER)
+    flag |= PATH_RAY_DIFFUSE;
 
-	return flag;
+  return flag;
 }
 
 ccl_device_inline float path_state_continuation_probability(KernelGlobals *kg,
                                                             ccl_addr_space PathState *state,
                                                             const float3 throughput)
 {
-	if(state->flag & PATH_RAY_TERMINATE_IMMEDIATE) {
-		/* Ray is to be terminated immediately. */
-		return 0.0f;
-	}
-	else if(state->flag & PATH_RAY_TRANSPARENT) {
-		/* Do at least one bounce without RR. */
-		if(state->transparent_bounce <= 1) {
-			return 1.0f;
-		}
+  if (state->flag & PATH_RAY_TERMINATE_IMMEDIATE) {
+    /* Ray is to be terminated immediately. */
+    return 0.0f;
+  }
+  else if (state->flag & PATH_RAY_TRANSPARENT) {
+    /* Do at least one bounce without RR. */
+    if (state->transparent_bounce <= 1) {
+      return 1.0f;
+    }
 #ifdef __SHADOW_TRICKS__
-		/* Exception for shadow catcher not working correctly with RR. */
-		else if((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->transparent_bounce <= 8)) {
-			return 1.0f;
-		}
+    /* Exception for shadow catcher not working correctly with RR. */
+    else if ((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->transparent_bounce <= 8)) {
+      return 1.0f;
+    }
 #endif
-	}
-	else {
-		/* Do at least one bounce without RR. */
-		if(state->bounce <= 1) {
-			return 1.0f;
-		}
+  }
+  else {
+    /* Do at least one bounce without RR. */
+    if (state->bounce <= 1) {
+      return 1.0f;
+    }
 #ifdef __SHADOW_TRICKS__
-		/* Exception for shadow catcher not working correctly with RR. */
-		else if((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->bounce <= 3)) {
-			return 1.0f;
-		}
+    /* Exception for shadow catcher not working correctly with RR. */
+    else if ((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->bounce <= 3)) {
+      return 1.0f;
+    }
 #endif
-	}
+  }
 
-	/* Probabilistic termination: use sqrt() to roughly match typical view
-	 * transform and do path termination a bit later on average. */
-	return min(sqrtf(max3(fabs(throughput)) * state->branch_factor), 1.0f);
+  /* Probabilistic termination: use sqrt() to roughly match typical view
+   * transform and do path termination a bit later on average. */
+  return min(sqrtf(max3(fabs(throughput)) * state->branch_factor), 1.0f);
 }
 
 /* TODO(DingTo): Find more meaningful name for this */
 ccl_device_inline void path_state_modify_bounce(ccl_addr_space PathState *state, bool increase)
 {
-	/* Modify bounce temporarily for shader eval */
-	if(increase)
-		state->bounce += 1;
-	else
-		state->bounce -= 1;
+  /* Modify bounce temporarily for shader eval */
+  if (increase)
+    state->bounce += 1;
+  else
+    state->bounce -= 1;
 }
 
 ccl_device_inline bool path_state_ao_bounce(KernelGlobals *kg, ccl_addr_space PathState *state)
 {
-    if(state->bounce <= kernel_data.integrator.ao_bounces) {
-        return false;
-    }
+  if (state->bounce <= kernel_data.integrator.ao_bounces) {
+    return false;
+  }
 
-    int bounce = state->bounce - state->transmission_bounce - (state->glossy_bounce > 0);
-    return (bounce > kernel_data.integrator.ao_bounces);
+  int bounce = state->bounce - state->transmission_bounce - (state->glossy_bounce > 0);
+  return (bounce > kernel_data.integrator.ao_bounces);
 }
 
 ccl_device_inline void path_state_branch(ccl_addr_space PathState *state,
                                          int branch,
                                          int num_branches)
 {
-	if(num_branches > 1) {
-		/* Path is splitting into a branch, adjust so that each branch
-		 * still gets a unique sample from the same sequence. */
-		state->sample = state->sample*num_branches + branch;
-		state->num_samples = state->num_samples*num_branches;
-		state->branch_factor *= num_branches;
-	}
+  if (num_branches > 1) {
+    /* Path is splitting into a branch, adjust so that each branch
+     * still gets a unique sample from the same sequence. */
+    state->sample = state->sample * num_branches + branch;
+    state->num_samples = state->num_samples * num_branches;
+    state->branch_factor *= num_branches;
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_subsurface.h b/intern/cycles/kernel/kernel_path_subsurface.h
index b5a92c74ed5..97d3f292ca3 100644
--- a/intern/cycles/kernel/kernel_path_subsurface.h
+++ b/intern/cycles/kernel/kernel_path_subsurface.h
@@ -22,141 +22,118 @@ ccl_device
 #  else
 ccl_device_inline
 #  endif
-bool kernel_path_subsurface_scatter(
-        KernelGlobals *kg,
-        ShaderData *sd,
-        ShaderData *emission_sd,
-        PathRadiance *L,
-        ccl_addr_space PathState *state,
-        ccl_addr_space Ray *ray,
-        ccl_addr_space float3 *throughput,
-        ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
+    bool
+    kernel_path_subsurface_scatter(KernelGlobals *kg,
+                                   ShaderData *sd,
+                                   ShaderData *emission_sd,
+                                   PathRadiance *L,
+                                   ccl_addr_space PathState *state,
+                                   ccl_addr_space Ray *ray,
+                                   ccl_addr_space float3 *throughput,
+                                   ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
 {
-	PROFILING_INIT(kg, PROFILING_SUBSURFACE);
-
-	float bssrdf_u, bssrdf_v;
-	path_state_rng_2D(kg, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
-
-	const ShaderClosure *sc = shader_bssrdf_pick(sd, throughput, &bssrdf_u);
-
-	/* do bssrdf scatter step if we picked a bssrdf closure */
-	if(sc) {
-		/* We should never have two consecutive BSSRDF bounces,
-		 * the second one should be converted to a diffuse BSDF to
-		 * avoid this.
-		 */
-		kernel_assert(!(state->flag & PATH_RAY_DIFFUSE_ANCESTOR));
-
-		uint lcg_state = lcg_state_init_addrspace(state, 0x68bc21eb);
-
-		LocalIntersection ss_isect;
-		int num_hits = subsurface_scatter_multi_intersect(kg,
-		                                                  &ss_isect,
-		                                                  sd,
-		                                                  state,
-		                                                  sc,
-		                                                  &lcg_state,
-		                                                  bssrdf_u, bssrdf_v,
-		                                                  false);
+  PROFILING_INIT(kg, PROFILING_SUBSURFACE);
+
+  float bssrdf_u, bssrdf_v;
+  path_state_rng_2D(kg, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+
+  const ShaderClosure *sc = shader_bssrdf_pick(sd, throughput, &bssrdf_u);
+
+  /* do bssrdf scatter step if we picked a bssrdf closure */
+  if (sc) {
+    /* We should never have two consecutive BSSRDF bounces,
+     * the second one should be converted to a diffuse BSDF to
+     * avoid this.
+     */
+    kernel_assert(!(state->flag & PATH_RAY_DIFFUSE_ANCESTOR));
+
+    uint lcg_state = lcg_state_init_addrspace(state, 0x68bc21eb);
+
+    LocalIntersection ss_isect;
+    int num_hits = subsurface_scatter_multi_intersect(
+        kg, &ss_isect, sd, state, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
 #  ifdef __VOLUME__
-		bool need_update_volume_stack =
-		        kernel_data.integrator.use_volumes &&
-		        sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
-#  endif  /* __VOLUME__ */
-
-		/* Closure memory will be overwritten, so read required variables now. */
-		Bssrdf *bssrdf = (Bssrdf *)sc;
-		ClosureType bssrdf_type = sc->type;
-		float bssrdf_roughness = bssrdf->roughness;
-
-		/* compute lighting with the BSDF closure */
-		for(int hit = 0; hit < num_hits; hit++) {
-			/* NOTE: We reuse the existing ShaderData, we assume the path
-			 * integration loop stops when this function returns true.
-			 */
-			subsurface_scatter_multi_setup(kg,
-			                               &ss_isect,
-			                               hit,
-			                               sd,
-			                               state,
-			                               bssrdf_type,
-			                               bssrdf_roughness);
-
-			kernel_path_surface_connect_light(kg, sd, emission_sd, *throughput, state, L);
-
-			ccl_addr_space PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
-			ccl_addr_space Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
-			ccl_addr_space float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
-			PathRadianceState *hit_L_state = &ss_indirect->L_state[ss_indirect->num_rays];
-
-			*hit_state = *state;
-			*hit_ray = *ray;
-			*hit_tp = *throughput;
-			*hit_L_state = L->state;
-
-			hit_state->rng_offset += PRNG_BOUNCE_NUM;
-
-			if(kernel_path_surface_bounce(kg,
-			                              sd,
-			                              hit_tp,
-			                              hit_state,
-			                              hit_L_state,
-			                              hit_ray))
-			{
+    bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+                                    sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+#  endif /* __VOLUME__ */
+
+    /* Closure memory will be overwritten, so read required variables now. */
+    Bssrdf *bssrdf = (Bssrdf *)sc;
+    ClosureType bssrdf_type = sc->type;
+    float bssrdf_roughness = bssrdf->roughness;
+
+    /* compute lighting with the BSDF closure */
+    for (int hit = 0; hit < num_hits; hit++) {
+      /* NOTE: We reuse the existing ShaderData, we assume the path
+       * integration loop stops when this function returns true.
+       */
+      subsurface_scatter_multi_setup(kg, &ss_isect, hit, sd, state, bssrdf_type, bssrdf_roughness);
+
+      kernel_path_surface_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+      ccl_addr_space PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
+      ccl_addr_space Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
+      ccl_addr_space float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
+      PathRadianceState *hit_L_state = &ss_indirect->L_state[ss_indirect->num_rays];
+
+      *hit_state = *state;
+      *hit_ray = *ray;
+      *hit_tp = *throughput;
+      *hit_L_state = L->state;
+
+      hit_state->rng_offset += PRNG_BOUNCE_NUM;
+
+      if (kernel_path_surface_bounce(kg, sd, hit_tp, hit_state, hit_L_state, hit_ray)) {
 #  ifdef __LAMP_MIS__
-				hit_state->ray_t = 0.0f;
-#  endif  /* __LAMP_MIS__ */
+        hit_state->ray_t = 0.0f;
+#  endif /* __LAMP_MIS__ */
 
 #  ifdef __VOLUME__
-				if(need_update_volume_stack) {
-					Ray volume_ray = *ray;
-					/* Setup ray from previous surface point to the new one. */
-					volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
-					                             &volume_ray.t);
-
-					kernel_volume_stack_update_for_subsurface(
-					    kg,
-					    emission_sd,
-					    &volume_ray,
-					    hit_state->volume_stack);
-				}
-#  endif  /* __VOLUME__ */
-				ss_indirect->num_rays++;
-			}
-		}
-		return true;
-	}
-	return false;
+        if (need_update_volume_stack) {
+          Ray volume_ray = *ray;
+          /* Setup ray from previous surface point to the new one. */
+          volume_ray.D = normalize_len(hit_ray->P - volume_ray.P, &volume_ray.t);
+
+          kernel_volume_stack_update_for_subsurface(
+              kg, emission_sd, &volume_ray, hit_state->volume_stack);
+        }
+#  endif /* __VOLUME__ */
+        ss_indirect->num_rays++;
+      }
+    }
+    return true;
+  }
+  return false;
 }
 
 ccl_device_inline void kernel_path_subsurface_init_indirect(
-        ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
+    ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
 {
-	ss_indirect->num_rays = 0;
+  ss_indirect->num_rays = 0;
 }
 
 ccl_device void kernel_path_subsurface_setup_indirect(
-        KernelGlobals *kg,
-        ccl_addr_space SubsurfaceIndirectRays *ss_indirect,
-        ccl_addr_space PathState *state,
-        ccl_addr_space Ray *ray,
-        PathRadiance *L,
-        ccl_addr_space float3 *throughput)
+    KernelGlobals *kg,
+    ccl_addr_space SubsurfaceIndirectRays *ss_indirect,
+    ccl_addr_space PathState *state,
+    ccl_addr_space Ray *ray,
+    PathRadiance *L,
+    ccl_addr_space float3 *throughput)
 {
-	/* Setup state, ray and throughput for indirect SSS rays. */
-	ss_indirect->num_rays--;
+  /* Setup state, ray and throughput for indirect SSS rays. */
+  ss_indirect->num_rays--;
 
-	path_radiance_sum_indirect(L);
-	path_radiance_reset_indirect(L);
+  path_radiance_sum_indirect(L);
+  path_radiance_reset_indirect(L);
 
-	*state = ss_indirect->state[ss_indirect->num_rays];
-	*ray = ss_indirect->rays[ss_indirect->num_rays];
-	L->state = ss_indirect->L_state[ss_indirect->num_rays];
-	*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
+  *state = ss_indirect->state[ss_indirect->num_rays];
+  *ray = ss_indirect->rays[ss_indirect->num_rays];
+  L->state = ss_indirect->L_state[ss_indirect->num_rays];
+  *throughput = ss_indirect->throughputs[ss_indirect->num_rays];
 
-	state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
+  state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
 }
 
-#endif  /* __SUBSURFACE__ */
+#endif /* __SUBSURFACE__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 0d18a1e8c77..6251313c5f8 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -16,255 +16,280 @@
 
 CCL_NAMESPACE_BEGIN
 
-#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__) || defined(__BAKING__)
+#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__) || \
+    defined(__BAKING__)
 /* branched path tracing: connect path directly to position on one or more lights and add it to L */
 ccl_device_noinline void kernel_branched_path_surface_connect_light(
-        KernelGlobals *kg,
-        ShaderData *sd,
-        ShaderData *emission_sd,
-        ccl_addr_space PathState *state,
-        float3 throughput,
-        float num_samples_adjust,
-        PathRadiance *L,
-        int sample_all_lights)
+    KernelGlobals *kg,
+    ShaderData *sd,
+    ShaderData *emission_sd,
+    ccl_addr_space PathState *state,
+    float3 throughput,
+    float num_samples_adjust,
+    PathRadiance *L,
+    int sample_all_lights)
 {
-#ifdef __EMISSION__
-	/* sample illumination from lights to find path contribution */
-	if(!(sd->flag & SD_BSDF_HAS_EVAL))
-		return;
-
-	Ray light_ray;
-	BsdfEval L_light;
-	bool is_lamp;
-
-#  ifdef __OBJECT_MOTION__
-	light_ray.time = sd->time;
+#  ifdef __EMISSION__
+  /* sample illumination from lights to find path contribution */
+  if (!(sd->flag & SD_BSDF_HAS_EVAL))
+    return;
+
+  Ray light_ray;
+  BsdfEval L_light;
+  bool is_lamp;
+
+#    ifdef __OBJECT_MOTION__
+  light_ray.time = sd->time;
+#    endif
+
+  if (sample_all_lights) {
+    /* lamp sampling */
+    for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
+      if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
+        continue;
+
+      int num_samples = ceil_to_int(num_samples_adjust * light_select_num_samples(kg, i));
+      float num_samples_inv = num_samples_adjust /
+                              (num_samples * kernel_data.integrator.num_all_lights);
+      uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
+
+      for (int j = 0; j < num_samples; j++) {
+        float light_u, light_v;
+        path_branched_rng_2D(
+            kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+        float terminate = path_branched_rng_light_termination(
+            kg, lamp_rng_hash, state, j, num_samples);
+
+        LightSample ls;
+        if (lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
+          /* The sampling probability returned by lamp_light_sample assumes that all lights were sampled.
+           * However, this code only samples lamps, so if the scene also had mesh lights, the real probability is twice as high. */
+          if (kernel_data.integrator.pdf_triangles != 0.0f)
+            ls.pdf *= 2.0f;
+
+          if (direct_emission(
+                  kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+            /* trace shadow ray */
+            float3 shadow;
+
+            if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+              /* accumulate */
+              path_radiance_accum_light(L,
+                                        state,
+                                        throughput * num_samples_inv,
+                                        &L_light,
+                                        shadow,
+                                        num_samples_inv,
+                                        is_lamp);
+            }
+            else {
+              path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light);
+            }
+          }
+        }
+      }
+    }
+
+    /* mesh light sampling */
+    if (kernel_data.integrator.pdf_triangles != 0.0f) {
+      int num_samples = ceil_to_int(num_samples_adjust *
+                                    kernel_data.integrator.mesh_light_samples);
+      float num_samples_inv = num_samples_adjust / num_samples;
+
+      for (int j = 0; j < num_samples; j++) {
+        float light_u, light_v;
+        path_branched_rng_2D(
+            kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+        float terminate = path_branched_rng_light_termination(
+            kg, state->rng_hash, state, j, num_samples);
+
+        /* only sample triangle lights */
+        if (kernel_data.integrator.num_all_lights)
+          light_u = 0.5f * light_u;
+
+        LightSample ls;
+        if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+          /* Same as above, probability needs to be corrected since the sampling was forced to select a mesh light. */
+          if (kernel_data.integrator.num_all_lights)
+            ls.pdf *= 2.0f;
+
+          if (direct_emission(
+                  kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+            /* trace shadow ray */
+            float3 shadow;
+
+            if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+              /* accumulate */
+              path_radiance_accum_light(L,
+                                        state,
+                                        throughput * num_samples_inv,
+                                        &L_light,
+                                        shadow,
+                                        num_samples_inv,
+                                        is_lamp);
+            }
+            else {
+              path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light);
+            }
+          }
+        }
+      }
+    }
+  }
+  else {
+    /* sample one light at random */
+    float light_u, light_v;
+    path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+    float terminate = path_state_rng_light_termination(kg, state);
+
+    LightSample ls;
+    if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+      /* sample random light */
+      if (direct_emission(
+              kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+        /* trace shadow ray */
+        float3 shadow;
+
+        if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+          /* accumulate */
+          path_radiance_accum_light(L,
+                                    state,
+                                    throughput * num_samples_adjust,
+                                    &L_light,
+                                    shadow,
+                                    num_samples_adjust,
+                                    is_lamp);
+        }
+        else {
+          path_radiance_accum_total_light(L, state, throughput * num_samples_adjust, &L_light);
+        }
+      }
+    }
+  }
 #  endif
-
-	if(sample_all_lights) {
-		/* lamp sampling */
-		for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
-			if(UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
-				continue;
-
-			int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
-			float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
-			uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
-
-			for(int j = 0; j < num_samples; j++) {
-				float light_u, light_v;
-				path_branched_rng_2D(kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-				float terminate = path_branched_rng_light_termination(kg, lamp_rng_hash, state, j, num_samples);
-
-				LightSample ls;
-				if(lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
-					/* The sampling probability returned by lamp_light_sample assumes that all lights were sampled.
-					 * However, this code only samples lamps, so if the scene also had mesh lights, the real probability is twice as high. */
-					if(kernel_data.integrator.pdf_triangles != 0.0f)
-						ls.pdf *= 2.0f;
-
-					if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-						/* trace shadow ray */
-						float3 shadow;
-
-						if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-							/* accumulate */
-							path_radiance_accum_light(L, state, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
-						}
-						else {
-							path_radiance_accum_total_light(L, state, throughput*num_samples_inv, &L_light);
-						}
-					}
-				}
-			}
-		}
-
-		/* mesh light sampling */
-		if(kernel_data.integrator.pdf_triangles != 0.0f) {
-			int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
-			float num_samples_inv = num_samples_adjust/num_samples;
-
-			for(int j = 0; j < num_samples; j++) {
-				float light_u, light_v;
-				path_branched_rng_2D(kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-				float terminate = path_branched_rng_light_termination(kg, state->rng_hash, state, j, num_samples);
-
-				/* only sample triangle lights */
-				if(kernel_data.integrator.num_all_lights)
-					light_u = 0.5f*light_u;
-
-				LightSample ls;
-				if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-					/* Same as above, probability needs to be corrected since the sampling was forced to select a mesh light. */
-					if(kernel_data.integrator.num_all_lights)
-						ls.pdf *= 2.0f;
-
-					if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-						/* trace shadow ray */
-						float3 shadow;
-
-						if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-							/* accumulate */
-							path_radiance_accum_light(L, state, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
-						}
-						else {
-							path_radiance_accum_total_light(L, state, throughput*num_samples_inv, &L_light);
-						}
-					}
-				}
-			}
-		}
-	}
-	else {
-		/* sample one light at random */
-		float light_u, light_v;
-		path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-		float terminate = path_state_rng_light_termination(kg, state);
-
-		LightSample ls;
-		if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-			/* sample random light */
-			if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-				/* trace shadow ray */
-				float3 shadow;
-
-				if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-					/* accumulate */
-					path_radiance_accum_light(L, state, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, is_lamp);
-				}
-				else {
-					path_radiance_accum_total_light(L, state, throughput*num_samples_adjust, &L_light);
-				}
-			}
-		}
-	}
-#endif
 }
 
 /* branched path tracing: bounce off or through surface to with new direction stored in ray */
-ccl_device bool kernel_branched_path_surface_bounce(
-        KernelGlobals *kg,
-        ShaderData *sd,
-        const ShaderClosure *sc,
-        int sample,
-        int num_samples,
-        ccl_addr_space float3 *throughput,
-        ccl_addr_space PathState *state,
-        PathRadianceState *L_state,
-        ccl_addr_space Ray *ray,
-        float sum_sample_weight)
+ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg,
+                                                    ShaderData *sd,
+                                                    const ShaderClosure *sc,
+                                                    int sample,
+                                                    int num_samples,
+                                                    ccl_addr_space float3 *throughput,
+                                                    ccl_addr_space PathState *state,
+                                                    PathRadianceState *L_state,
+                                                    ccl_addr_space Ray *ray,
+                                                    float sum_sample_weight)
 {
-	/* sample BSDF */
-	float bsdf_pdf;
-	BsdfEval bsdf_eval;
-	float3 bsdf_omega_in;
-	differential3 bsdf_domega_in;
-	float bsdf_u, bsdf_v;
-	path_branched_rng_2D(kg, state->rng_hash, state, sample, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-	int label;
-
-	label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval,
-		&bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
-
-	if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
-		return false;
-
-	/* modify throughput */
-	path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
-
-#ifdef __DENOISING_FEATURES__
-	state->denoising_feature_weight *= sc->sample_weight / (sum_sample_weight * num_samples);
-#endif
+  /* sample BSDF */
+  float bsdf_pdf;
+  BsdfEval bsdf_eval;
+  float3 bsdf_omega_in;
+  differential3 bsdf_domega_in;
+  float bsdf_u, bsdf_v;
+  path_branched_rng_2D(
+      kg, state->rng_hash, state, sample, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+  int label;
+
+  label = shader_bsdf_sample_closure(
+      kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+
+  if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
+    return false;
+
+  /* modify throughput */
+  path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
+
+#  ifdef __DENOISING_FEATURES__
+  state->denoising_feature_weight *= sc->sample_weight / (sum_sample_weight * num_samples);
+#  endif
 
-	/* modify path state */
-	path_state_next(kg, state, label);
+  /* modify path state */
+  path_state_next(kg, state, label);
 
-	/* setup ray */
-	ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
-	ray->D = normalize(bsdf_omega_in);
-	ray->t = FLT_MAX;
-#ifdef __RAY_DIFFERENTIALS__
-	ray->dP = sd->dP;
-	ray->dD = bsdf_domega_in;
-#endif
-#ifdef __OBJECT_MOTION__
-	ray->time = sd->time;
-#endif
+  /* setup ray */
+  ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
+  ray->D = normalize(bsdf_omega_in);
+  ray->t = FLT_MAX;
+#  ifdef __RAY_DIFFERENTIALS__
+  ray->dP = sd->dP;
+  ray->dD = bsdf_domega_in;
+#  endif
+#  ifdef __OBJECT_MOTION__
+  ray->time = sd->time;
+#  endif
 
-#ifdef __VOLUME__
-	/* enter/exit volume */
-	if(label & LABEL_TRANSMIT)
-		kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
-#endif
+#  ifdef __VOLUME__
+  /* enter/exit volume */
+  if (label & LABEL_TRANSMIT)
+    kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+#  endif
 
-	/* branch RNG state */
-	path_state_branch(state, sample, num_samples);
+  /* branch RNG state */
+  path_state_branch(state, sample, num_samples);
 
-	/* set MIS state */
-	state->min_ray_pdf = fminf(bsdf_pdf, FLT_MAX);
-	state->ray_pdf = bsdf_pdf;
-#ifdef __LAMP_MIS__
-	state->ray_t = 0.0f;
-#endif
+  /* set MIS state */
+  state->min_ray_pdf = fminf(bsdf_pdf, FLT_MAX);
+  state->ray_pdf = bsdf_pdf;
+#  ifdef __LAMP_MIS__
+  state->ray_t = 0.0f;
+#  endif
 
-	return true;
+  return true;
 }
 
 #endif
 
 /* path tracing: connect path directly to position on a light and add it to L */
 ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg,
-	ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
-	PathRadiance *L)
+                                                         ShaderData *sd,
+                                                         ShaderData *emission_sd,
+                                                         float3 throughput,
+                                                         ccl_addr_space PathState *state,
+                                                         PathRadiance *L)
 {
-	PROFILING_INIT(kg, PROFILING_CONNECT_LIGHT);
+  PROFILING_INIT(kg, PROFILING_CONNECT_LIGHT);
 
 #ifdef __EMISSION__
-	if(!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)))
-		return;
-
-#ifdef __SHADOW_TRICKS__
-	if(state->flag & PATH_RAY_SHADOW_CATCHER) {
-		kernel_branched_path_surface_connect_light(kg,
-		                                           sd,
-		                                           emission_sd,
-		                                           state,
-		                                           throughput,
-		                                           1.0f,
-		                                           L,
-		                                           1);
-		return;
-	}
-#endif
+  if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)))
+    return;
+
+#  ifdef __SHADOW_TRICKS__
+  if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+    kernel_branched_path_surface_connect_light(kg, sd, emission_sd, state, throughput, 1.0f, L, 1);
+    return;
+  }
+#  endif
 
-	/* sample illumination from lights to find path contribution */
-	float light_u, light_v;
-	path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+  /* sample illumination from lights to find path contribution */
+  float light_u, light_v;
+  path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
 
-	Ray light_ray;
-	BsdfEval L_light;
-	bool is_lamp;
+  Ray light_ray;
+  BsdfEval L_light;
+  bool is_lamp;
 
-#ifdef __OBJECT_MOTION__
-	light_ray.time = sd->time;
-#endif
+#  ifdef __OBJECT_MOTION__
+  light_ray.time = sd->time;
+#  endif
 
-	LightSample ls;
-	if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-		float terminate = path_state_rng_light_termination(kg, state);
-		if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-			/* trace shadow ray */
-			float3 shadow;
-
-			if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-				/* accumulate */
-				path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
-			}
-			else {
-				path_radiance_accum_total_light(L, state, throughput, &L_light);
-			}
-		}
-	}
+  LightSample ls;
+  if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+    float terminate = path_state_rng_light_termination(kg, state);
+    if (direct_emission(
+            kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+      /* trace shadow ray */
+      float3 shadow;
+
+      if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+        /* accumulate */
+        path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+      }
+      else {
+        path_radiance_accum_total_light(L, state, throughput, &L_light);
+      }
+    }
+  }
 #endif
 }
 
@@ -276,87 +301,87 @@ ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg,
                                            PathRadianceState *L_state,
                                            ccl_addr_space Ray *ray)
 {
-	PROFILING_INIT(kg, PROFILING_SURFACE_BOUNCE);
-
-	/* no BSDF? we can stop here */
-	if(sd->flag & SD_BSDF) {
-		/* sample BSDF */
-		float bsdf_pdf;
-		BsdfEval bsdf_eval;
-		float3 bsdf_omega_in;
-		differential3 bsdf_domega_in;
-		float bsdf_u, bsdf_v;
-		path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-		int label;
-
-		label = shader_bsdf_sample(kg, sd, bsdf_u, bsdf_v, &bsdf_eval,
-			&bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
-
-		if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
-			return false;
-
-		/* modify throughput */
-		path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
-
-		/* set labels */
-		if(!(label & LABEL_TRANSPARENT)) {
-			state->ray_pdf = bsdf_pdf;
+  PROFILING_INIT(kg, PROFILING_SURFACE_BOUNCE);
+
+  /* no BSDF? we can stop here */
+  if (sd->flag & SD_BSDF) {
+    /* sample BSDF */
+    float bsdf_pdf;
+    BsdfEval bsdf_eval;
+    float3 bsdf_omega_in;
+    differential3 bsdf_domega_in;
+    float bsdf_u, bsdf_v;
+    path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+    int label;
+
+    label = shader_bsdf_sample(
+        kg, sd, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+
+    if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
+      return false;
+
+    /* modify throughput */
+    path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
+
+    /* set labels */
+    if (!(label & LABEL_TRANSPARENT)) {
+      state->ray_pdf = bsdf_pdf;
 #ifdef __LAMP_MIS__
-			state->ray_t = 0.0f;
+      state->ray_t = 0.0f;
 #endif
-			state->min_ray_pdf = fminf(bsdf_pdf, state->min_ray_pdf);
-		}
+      state->min_ray_pdf = fminf(bsdf_pdf, state->min_ray_pdf);
+    }
 
-		/* update path state */
-		path_state_next(kg, state, label);
+    /* update path state */
+    path_state_next(kg, state, label);
 
-		/* setup ray */
-		ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
-		ray->D = normalize(bsdf_omega_in);
+    /* setup ray */
+    ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
+    ray->D = normalize(bsdf_omega_in);
 
-		if(state->bounce == 0)
-			ray->t -= sd->ray_length; /* clipping works through transparent */
-		else
-			ray->t = FLT_MAX;
+    if (state->bounce == 0)
+      ray->t -= sd->ray_length; /* clipping works through transparent */
+    else
+      ray->t = FLT_MAX;
 
 #ifdef __RAY_DIFFERENTIALS__
-		ray->dP = sd->dP;
-		ray->dD = bsdf_domega_in;
+    ray->dP = sd->dP;
+    ray->dD = bsdf_domega_in;
 #endif
 
 #ifdef __VOLUME__
-		/* enter/exit volume */
-		if(label & LABEL_TRANSMIT)
-			kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+    /* enter/exit volume */
+    if (label & LABEL_TRANSMIT)
+      kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
 #endif
-		return true;
-	}
+    return true;
+  }
 #ifdef __VOLUME__
-	else if(sd->flag & SD_HAS_ONLY_VOLUME) {
-		if(!path_state_volume_next(kg, state)) {
-			return false;
-		}
-
-		if(state->bounce == 0)
-			ray->t -= sd->ray_length; /* clipping works through transparent */
-		else
-			ray->t = FLT_MAX;
-
-		/* setup ray position, direction stays unchanged */
-		ray->P = ray_offset(sd->P, -sd->Ng);
-#ifdef __RAY_DIFFERENTIALS__
-		ray->dP = sd->dP;
-#endif
+  else if (sd->flag & SD_HAS_ONLY_VOLUME) {
+    if (!path_state_volume_next(kg, state)) {
+      return false;
+    }
+
+    if (state->bounce == 0)
+      ray->t -= sd->ray_length; /* clipping works through transparent */
+    else
+      ray->t = FLT_MAX;
+
+    /* setup ray position, direction stays unchanged */
+    ray->P = ray_offset(sd->P, -sd->Ng);
+#  ifdef __RAY_DIFFERENTIALS__
+    ray->dP = sd->dP;
+#  endif
 
-		/* enter/exit volume */
-		kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
-		return true;
-	}
+    /* enter/exit volume */
+    kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+    return true;
+  }
 #endif
-	else {
-		/* no bsdf or volume? */
-		return false;
-	}
+  else {
+    /* no bsdf or volume? */
+    return false;
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h
index d2506fc1e7e..fea4dfc159d 100644
--- a/intern/cycles/kernel/kernel_path_volume.h
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -18,269 +18,307 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __VOLUME_SCATTER__
 
-ccl_device_inline void kernel_path_volume_connect_light(
-        KernelGlobals *kg,
-        ShaderData *sd,
-        ShaderData *emission_sd,
-        float3 throughput,
-        ccl_addr_space PathState *state,
-        PathRadiance *L)
+ccl_device_inline void kernel_path_volume_connect_light(KernelGlobals *kg,
+                                                        ShaderData *sd,
+                                                        ShaderData *emission_sd,
+                                                        float3 throughput,
+                                                        ccl_addr_space PathState *state,
+                                                        PathRadiance *L)
 {
-#ifdef __EMISSION__
-	if(!kernel_data.integrator.use_direct_light)
-		return;
-
-	/* sample illumination from lights to find path contribution */
-	float light_u, light_v;
-	path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-
-	Ray light_ray;
-	BsdfEval L_light;
-	LightSample ls;
-	bool is_lamp;
-
-	/* connect to light from given point where shader has been evaluated */
-	light_ray.time = sd->time;
-
-	if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls))
-	{
-		float terminate = path_state_rng_light_termination(kg, state);
-		if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-			/* trace shadow ray */
-			float3 shadow;
-
-			if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-				/* accumulate */
-				path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
-			}
-		}
-	}
-#endif  /* __EMISSION__ */
+#  ifdef __EMISSION__
+  if (!kernel_data.integrator.use_direct_light)
+    return;
+
+  /* sample illumination from lights to find path contribution */
+  float light_u, light_v;
+  path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+  Ray light_ray;
+  BsdfEval L_light;
+  LightSample ls;
+  bool is_lamp;
+
+  /* connect to light from given point where shader has been evaluated */
+  light_ray.time = sd->time;
+
+  if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+    float terminate = path_state_rng_light_termination(kg, state);
+    if (direct_emission(
+            kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+      /* trace shadow ray */
+      float3 shadow;
+
+      if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+        /* accumulate */
+        path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+      }
+    }
+  }
+#  endif /* __EMISSION__ */
 }
 
-#ifdef __KERNEL_GPU__
+#  ifdef __KERNEL_GPU__
 ccl_device_noinline
-#else
+#  else
 ccl_device
-#endif
-bool kernel_path_volume_bounce(
-    KernelGlobals *kg,
-    ShaderData *sd,
-    ccl_addr_space float3 *throughput,
-    ccl_addr_space PathState *state,
-    PathRadianceState *L_state,
-    ccl_addr_space Ray *ray)
+#  endif
+    bool
+    kernel_path_volume_bounce(KernelGlobals *kg,
+                              ShaderData *sd,
+                              ccl_addr_space float3 *throughput,
+                              ccl_addr_space PathState *state,
+                              PathRadianceState *L_state,
+                              ccl_addr_space Ray *ray)
 {
-	/* sample phase function */
-	float phase_pdf;
-	BsdfEval phase_eval;
-	float3 phase_omega_in;
-	differential3 phase_domega_in;
-	float phase_u, phase_v;
-	path_state_rng_2D(kg, state, PRNG_BSDF_U, &phase_u, &phase_v);
-	int label;
-
-	label = shader_volume_phase_sample(kg, sd, phase_u, phase_v, &phase_eval,
-		&phase_omega_in, &phase_domega_in, &phase_pdf);
-
-	if(phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval))
-		return false;
-
-	/* modify throughput */
-	path_radiance_bsdf_bounce(kg, L_state, throughput, &phase_eval, phase_pdf, state->bounce, label);
-
-	/* set labels */
-	state->ray_pdf = phase_pdf;
-#ifdef __LAMP_MIS__
-	state->ray_t = 0.0f;
-#endif
-	state->min_ray_pdf = fminf(phase_pdf, state->min_ray_pdf);
-
-	/* update path state */
-	path_state_next(kg, state, label);
-
-	/* Russian roulette termination of volume ray scattering. */
-	float probability = path_state_continuation_probability(kg, state, *throughput);
-
-	if(probability == 0.0f) {
-		return false;
-	}
-	else if(probability != 1.0f) {
-		/* Use dimension from the previous bounce, has not been used yet. */
-		float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE - PRNG_BOUNCE_NUM);
-
-		if(terminate >= probability) {
-			return false;
-		}
-
-		*throughput /= probability;
-	}
-
-	/* setup ray */
-	ray->P = sd->P;
-	ray->D = phase_omega_in;
-	ray->t = FLT_MAX;
-
-#ifdef __RAY_DIFFERENTIALS__
-	ray->dP = sd->dP;
-	ray->dD = phase_domega_in;
-#endif
-
-	return true;
+  /* sample phase function */
+  float phase_pdf;
+  BsdfEval phase_eval;
+  float3 phase_omega_in;
+  differential3 phase_domega_in;
+  float phase_u, phase_v;
+  path_state_rng_2D(kg, state, PRNG_BSDF_U, &phase_u, &phase_v);
+  int label;
+
+  label = shader_volume_phase_sample(
+      kg, sd, phase_u, phase_v, &phase_eval, &phase_omega_in, &phase_domega_in, &phase_pdf);
+
+  if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval))
+    return false;
+
+  /* modify throughput */
+  path_radiance_bsdf_bounce(kg, L_state, throughput, &phase_eval, phase_pdf, state->bounce, label);
+
+  /* set labels */
+  state->ray_pdf = phase_pdf;
+#  ifdef __LAMP_MIS__
+  state->ray_t = 0.0f;
+#  endif
+  state->min_ray_pdf = fminf(phase_pdf, state->min_ray_pdf);
+
+  /* update path state */
+  path_state_next(kg, state, label);
+
+  /* Russian roulette termination of volume ray scattering. */
+  float probability = path_state_continuation_probability(kg, state, *throughput);
+
+  if (probability == 0.0f) {
+    return false;
+  }
+  else if (probability != 1.0f) {
+    /* Use dimension from the previous bounce, has not been used yet. */
+    float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE - PRNG_BOUNCE_NUM);
+
+    if (terminate >= probability) {
+      return false;
+    }
+
+    *throughput /= probability;
+  }
+
+  /* setup ray */
+  ray->P = sd->P;
+  ray->D = phase_omega_in;
+  ray->t = FLT_MAX;
+
+#  ifdef __RAY_DIFFERENTIALS__
+  ray->dP = sd->dP;
+  ray->dD = phase_domega_in;
+#  endif
+
+  return true;
 }
 
-#ifndef __SPLIT_KERNEL__
-ccl_device void kernel_branched_path_volume_connect_light(
-        KernelGlobals *kg,
-        ShaderData *sd,
-        ShaderData *emission_sd,
-        float3 throughput,
-        ccl_addr_space PathState *state,
-        PathRadiance *L,
-        bool sample_all_lights,
-        Ray *ray,
-        const VolumeSegment *segment)
+#  ifndef __SPLIT_KERNEL__
+ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg,
+                                                          ShaderData *sd,
+                                                          ShaderData *emission_sd,
+                                                          float3 throughput,
+                                                          ccl_addr_space PathState *state,
+                                                          PathRadiance *L,
+                                                          bool sample_all_lights,
+                                                          Ray *ray,
+                                                          const VolumeSegment *segment)
 {
-#ifdef __EMISSION__
-	if(!kernel_data.integrator.use_direct_light)
-		return;
-
-	Ray light_ray;
-	BsdfEval L_light;
-	bool is_lamp;
-
-	light_ray.time = sd->time;
-
-	if(sample_all_lights) {
-		/* lamp sampling */
-		for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
-			if(UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
-				continue;
-
-			int num_samples = light_select_num_samples(kg, i);
-			float num_samples_inv = 1.0f/(num_samples*kernel_data.integrator.num_all_lights);
-			uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
-
-			for(int j = 0; j < num_samples; j++) {
-				/* sample random position on given light */
-				float light_u, light_v;
-				path_branched_rng_2D(kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-
-				LightSample ls;
-				lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls);
-
-				float3 tp = throughput;
-
-				/* sample position on volume segment */
-				float rphase = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL);
-				float rscatter = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE);
-
-				VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
-					state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
-
-				/* todo: split up light_sample so we don't have to call it again with new position */
-				if(result == VOLUME_PATH_SCATTERED &&
-				   lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
-					if(kernel_data.integrator.pdf_triangles != 0.0f)
-						ls.pdf *= 2.0f;
-
-					float terminate = path_branched_rng_light_termination(kg, state->rng_hash, state, j, num_samples);
-					if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-						/* trace shadow ray */
-						float3 shadow;
-
-						if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-							/* accumulate */
-							path_radiance_accum_light(L, state, tp*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
-						}
-					}
-				}
-			}
-		}
-
-		/* mesh light sampling */
-		if(kernel_data.integrator.pdf_triangles != 0.0f) {
-			int num_samples = kernel_data.integrator.mesh_light_samples;
-			float num_samples_inv = 1.0f/num_samples;
-
-			for(int j = 0; j < num_samples; j++) {
-				/* sample random position on random triangle */
-				float light_u, light_v;
-				path_branched_rng_2D(kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-
-				/* only sample triangle lights */
-				if(kernel_data.integrator.num_all_lights)
-					light_u = 0.5f*light_u;
-
-				LightSample ls;
-				light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
-
-				float3 tp = throughput;
-
-				/* sample position on volume segment */
-				float rphase = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL);
-				float rscatter = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE);
-
-				VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
-					state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
-
-				/* todo: split up light_sample so we don't have to call it again with new position */
-				if(result == VOLUME_PATH_SCATTERED &&
-				   light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-					if(kernel_data.integrator.num_all_lights)
-						ls.pdf *= 2.0f;
-
-					float terminate = path_branched_rng_light_termination(kg, state->rng_hash, state, j, num_samples);
-					if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-						/* trace shadow ray */
-						float3 shadow;
-
-						if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-							/* accumulate */
-							path_radiance_accum_light(L, state, tp*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
-						}
-					}
-				}
-			}
-		}
-	}
-	else {
-		/* sample random position on random light */
-		float light_u, light_v;
-		path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-
-		LightSample ls;
-		light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
-
-		float3 tp = throughput;
-
-		/* sample position on volume segment */
-		float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
-		float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-
-		VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
-			state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
-
-		/* todo: split up light_sample so we don't have to call it again with new position */
-		if(result == VOLUME_PATH_SCATTERED &&
-		   light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-			/* sample random light */
-			float terminate = path_state_rng_light_termination(kg, state);
-			if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-				/* trace shadow ray */
-				float3 shadow;
-
-				if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-					/* accumulate */
-					path_radiance_accum_light(L, state, tp, &L_light, shadow, 1.0f, is_lamp);
-				}
-			}
-		}
-	}
-#endif  /* __EMISSION__ */
+#    ifdef __EMISSION__
+  if (!kernel_data.integrator.use_direct_light)
+    return;
+
+  Ray light_ray;
+  BsdfEval L_light;
+  bool is_lamp;
+
+  light_ray.time = sd->time;
+
+  if (sample_all_lights) {
+    /* lamp sampling */
+    for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
+      if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
+        continue;
+
+      int num_samples = light_select_num_samples(kg, i);
+      float num_samples_inv = 1.0f / (num_samples * kernel_data.integrator.num_all_lights);
+      uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
+
+      for (int j = 0; j < num_samples; j++) {
+        /* sample random position on given light */
+        float light_u, light_v;
+        path_branched_rng_2D(
+            kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+        LightSample ls;
+        lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls);
+
+        float3 tp = throughput;
+
+        /* sample position on volume segment */
+        float rphase = path_branched_rng_1D(
+            kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL);
+        float rscatter = path_branched_rng_1D(
+            kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE);
+
+        VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+                                                                       state,
+                                                                       ray,
+                                                                       sd,
+                                                                       &tp,
+                                                                       rphase,
+                                                                       rscatter,
+                                                                       segment,
+                                                                       (ls.t != FLT_MAX) ? &ls.P :
+                                                                                           NULL,
+                                                                       false);
+
+        /* todo: split up light_sample so we don't have to call it again with new position */
+        if (result == VOLUME_PATH_SCATTERED &&
+            lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
+          if (kernel_data.integrator.pdf_triangles != 0.0f)
+            ls.pdf *= 2.0f;
+
+          float terminate = path_branched_rng_light_termination(
+              kg, state->rng_hash, state, j, num_samples);
+          if (direct_emission(
+                  kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+            /* trace shadow ray */
+            float3 shadow;
+
+            if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+              /* accumulate */
+              path_radiance_accum_light(
+                  L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
+            }
+          }
+        }
+      }
+    }
+
+    /* mesh light sampling */
+    if (kernel_data.integrator.pdf_triangles != 0.0f) {
+      int num_samples = kernel_data.integrator.mesh_light_samples;
+      float num_samples_inv = 1.0f / num_samples;
+
+      for (int j = 0; j < num_samples; j++) {
+        /* sample random position on random triangle */
+        float light_u, light_v;
+        path_branched_rng_2D(
+            kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+        /* only sample triangle lights */
+        if (kernel_data.integrator.num_all_lights)
+          light_u = 0.5f * light_u;
+
+        LightSample ls;
+        light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
+
+        float3 tp = throughput;
+
+        /* sample position on volume segment */
+        float rphase = path_branched_rng_1D(
+            kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL);
+        float rscatter = path_branched_rng_1D(
+            kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE);
+
+        VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+                                                                       state,
+                                                                       ray,
+                                                                       sd,
+                                                                       &tp,
+                                                                       rphase,
+                                                                       rscatter,
+                                                                       segment,
+                                                                       (ls.t != FLT_MAX) ? &ls.P :
+                                                                                           NULL,
+                                                                       false);
+
+        /* todo: split up light_sample so we don't have to call it again with new position */
+        if (result == VOLUME_PATH_SCATTERED &&
+            light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+          if (kernel_data.integrator.num_all_lights)
+            ls.pdf *= 2.0f;
+
+          float terminate = path_branched_rng_light_termination(
+              kg, state->rng_hash, state, j, num_samples);
+          if (direct_emission(
+                  kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+            /* trace shadow ray */
+            float3 shadow;
+
+            if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+              /* accumulate */
+              path_radiance_accum_light(
+                  L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
+            }
+          }
+        }
+      }
+    }
+  }
+  else {
+    /* sample random position on random light */
+    float light_u, light_v;
+    path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+    LightSample ls;
+    light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
+
+    float3 tp = throughput;
+
+    /* sample position on volume segment */
+    float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+    float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+
+    VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+                                                                   state,
+                                                                   ray,
+                                                                   sd,
+                                                                   &tp,
+                                                                   rphase,
+                                                                   rscatter,
+                                                                   segment,
+                                                                   (ls.t != FLT_MAX) ? &ls.P :
+                                                                                       NULL,
+                                                                   false);
+
+    /* todo: split up light_sample so we don't have to call it again with new position */
+    if (result == VOLUME_PATH_SCATTERED &&
+        light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+      /* sample random light */
+      float terminate = path_state_rng_light_termination(kg, state);
+      if (direct_emission(
+              kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+        /* trace shadow ray */
+        float3 shadow;
+
+        if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+          /* accumulate */
+          path_radiance_accum_light(L, state, tp, &L_light, shadow, 1.0f, is_lamp);
+        }
+      }
+    }
+  }
+#    endif /* __EMISSION__ */
 }
-#endif  /* __SPLIT_KERNEL__ */
+#  endif /* __SPLIT_KERNEL__ */
 
-#endif  /* __VOLUME_SCATTER__ */
+#endif /* __VOLUME_SCATTER__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_profiling.h b/intern/cycles/kernel/kernel_profiling.h
index a46d6376473..780830879d8 100644
--- a/intern/cycles/kernel/kernel_profiling.h
+++ b/intern/cycles/kernel/kernel_profiling.h
@@ -26,15 +26,21 @@ CCL_NAMESPACE_BEGIN
 #ifdef __KERNEL_CPU__
 #  define PROFILING_INIT(kg, event) ProfilingHelper profiling_helper(&kg->profiler, event)
 #  define PROFILING_EVENT(event) profiling_helper.set_event(event)
-#  define PROFILING_SHADER(shader) if((shader) != SHADER_NONE) { profiling_helper.set_shader((shader) & SHADER_MASK); }
-#  define PROFILING_OBJECT(object) if((object) != PRIM_NONE) { profiling_helper.set_object(object); }
+#  define PROFILING_SHADER(shader) \
+    if ((shader) != SHADER_NONE) { \
+      profiling_helper.set_shader((shader)&SHADER_MASK); \
+    }
+#  define PROFILING_OBJECT(object) \
+    if ((object) != PRIM_NONE) { \
+      profiling_helper.set_object(object); \
+    }
 #else
 #  define PROFILING_INIT(kg, event)
 #  define PROFILING_EVENT(event)
 #  define PROFILING_SHADER(shader)
 #  define PROFILING_OBJECT(object)
-#endif  /* __KERNEL_CPU__ */
+#endif /* __KERNEL_CPU__ */
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_PROFILING_H__ */
+#endif /* __KERNEL_PROFILING_H__ */
diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h
index 7bad89c831c..f74ced45fd5 100644
--- a/intern/cycles/kernel/kernel_projection.h
+++ b/intern/cycles/kernel/kernel_projection.h
@@ -39,233 +39,223 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device float2 direction_to_spherical(float3 dir)
 {
-	float theta = safe_acosf(dir.z);
-	float phi = atan2f(dir.x, dir.y);
+  float theta = safe_acosf(dir.z);
+  float phi = atan2f(dir.x, dir.y);
 
-	return make_float2(theta, phi);
+  return make_float2(theta, phi);
 }
 
 ccl_device float3 spherical_to_direction(float theta, float phi)
 {
-	float sin_theta = sinf(theta);
-	return make_float3(sin_theta*cosf(phi),
-	                   sin_theta*sinf(phi),
-	                   cosf(theta));
+  float sin_theta = sinf(theta);
+  return make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cosf(theta));
 }
 
 /* Equirectangular coordinates <-> Cartesian direction */
 
 ccl_device float2 direction_to_equirectangular_range(float3 dir, float4 range)
 {
-	if(is_zero(dir))
-		return make_float2(0.0f, 0.0f);
+  if (is_zero(dir))
+    return make_float2(0.0f, 0.0f);
 
-	float u = (atan2f(dir.y, dir.x) - range.y) / range.x;
-	float v = (acosf(dir.z / len(dir)) - range.w) / range.z;
+  float u = (atan2f(dir.y, dir.x) - range.y) / range.x;
+  float v = (acosf(dir.z / len(dir)) - range.w) / range.z;
 
-	return make_float2(u, v);
+  return make_float2(u, v);
 }
 
 ccl_device float3 equirectangular_range_to_direction(float u, float v, float4 range)
 {
-	float phi = range.x*u + range.y;
-	float theta = range.z*v + range.w;
-	float sin_theta = sinf(theta);
-	return make_float3(sin_theta*cosf(phi),
-	                   sin_theta*sinf(phi),
-	                   cosf(theta));
+  float phi = range.x * u + range.y;
+  float theta = range.z * v + range.w;
+  float sin_theta = sinf(theta);
+  return make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cosf(theta));
 }
 
 ccl_device float2 direction_to_equirectangular(float3 dir)
 {
-	return direction_to_equirectangular_range(dir, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
+  return direction_to_equirectangular_range(dir, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
 }
 
 ccl_device float3 equirectangular_to_direction(float u, float v)
 {
-	return equirectangular_range_to_direction(u, v, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
+  return equirectangular_range_to_direction(u, v, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
 }
 
 /* Fisheye <-> Cartesian direction */
 
 ccl_device float2 direction_to_fisheye(float3 dir, float fov)
 {
-	float r = atan2f(sqrtf(dir.y*dir.y +  dir.z*dir.z), dir.x) / fov;
-	float phi = atan2f(dir.z, dir.y);
+  float r = atan2f(sqrtf(dir.y * dir.y + dir.z * dir.z), dir.x) / fov;
+  float phi = atan2f(dir.z, dir.y);
 
-	float u = r * cosf(phi) + 0.5f;
-	float v = r * sinf(phi) + 0.5f;
+  float u = r * cosf(phi) + 0.5f;
+  float v = r * sinf(phi) + 0.5f;
 
-	return make_float2(u, v);
+  return make_float2(u, v);
 }
 
 ccl_device float3 fisheye_to_direction(float u, float v, float fov)
 {
-	u = (u - 0.5f) * 2.0f;
-	v = (v - 0.5f) * 2.0f;
+  u = (u - 0.5f) * 2.0f;
+  v = (v - 0.5f) * 2.0f;
 
-	float r = sqrtf(u*u + v*v);
+  float r = sqrtf(u * u + v * v);
 
-	if(r > 1.0f)
-		return make_float3(0.0f, 0.0f, 0.0f);
+  if (r > 1.0f)
+    return make_float3(0.0f, 0.0f, 0.0f);
 
-	float phi = safe_acosf((r != 0.0f)? u/r: 0.0f);
-	float theta = r * fov * 0.5f;
+  float phi = safe_acosf((r != 0.0f) ? u / r : 0.0f);
+  float theta = r * fov * 0.5f;
 
-	if(v < 0.0f) phi = -phi;
+  if (v < 0.0f)
+    phi = -phi;
 
-	return make_float3(
-		 cosf(theta),
-		 -cosf(phi)*sinf(theta),
-		 sinf(phi)*sinf(theta)
-	);
+  return make_float3(cosf(theta), -cosf(phi) * sinf(theta), sinf(phi) * sinf(theta));
 }
 
 ccl_device float2 direction_to_fisheye_equisolid(float3 dir, float lens, float width, float height)
 {
-	float theta = safe_acosf(dir.x);
-	float r = 2.0f * lens * sinf(theta * 0.5f);
-	float phi = atan2f(dir.z, dir.y);
+  float theta = safe_acosf(dir.x);
+  float r = 2.0f * lens * sinf(theta * 0.5f);
+  float phi = atan2f(dir.z, dir.y);
 
-	float u = r * cosf(phi) / width + 0.5f;
-	float v = r * sinf(phi) / height + 0.5f;
+  float u = r * cosf(phi) / width + 0.5f;
+  float v = r * sinf(phi) / height + 0.5f;
 
-	return make_float2(u, v);
+  return make_float2(u, v);
 }
 
-ccl_device_inline float3 fisheye_equisolid_to_direction(float u, float v,
-                                                        float lens,
-                                                        float fov,
-                                                        float width, float height)
+ccl_device_inline float3
+fisheye_equisolid_to_direction(float u, float v, float lens, float fov, float width, float height)
 {
-	u = (u - 0.5f) * width;
-	v = (v - 0.5f) * height;
+  u = (u - 0.5f) * width;
+  v = (v - 0.5f) * height;
 
-	float rmax = 2.0f * lens * sinf(fov * 0.25f);
-	float r = sqrtf(u*u + v*v);
+  float rmax = 2.0f * lens * sinf(fov * 0.25f);
+  float r = sqrtf(u * u + v * v);
 
-	if(r > rmax)
-		return make_float3(0.0f, 0.0f, 0.0f);
+  if (r > rmax)
+    return make_float3(0.0f, 0.0f, 0.0f);
 
-	float phi = safe_acosf((r != 0.0f)? u/r: 0.0f);
-	float theta = 2.0f * asinf(r/(2.0f * lens));
+  float phi = safe_acosf((r != 0.0f) ? u / r : 0.0f);
+  float theta = 2.0f * asinf(r / (2.0f * lens));
 
-	if(v < 0.0f) phi = -phi;
+  if (v < 0.0f)
+    phi = -phi;
 
-	return make_float3(
-		 cosf(theta),
-		 -cosf(phi)*sinf(theta),
-		 sinf(phi)*sinf(theta)
-	);
+  return make_float3(cosf(theta), -cosf(phi) * sinf(theta), sinf(phi) * sinf(theta));
 }
 
 /* Mirror Ball <-> Cartesion direction */
 
 ccl_device float3 mirrorball_to_direction(float u, float v)
 {
-	/* point on sphere */
-	float3 dir;
+  /* point on sphere */
+  float3 dir;
 
-	dir.x = 2.0f*u - 1.0f;
-	dir.z = 2.0f*v - 1.0f;
+  dir.x = 2.0f * u - 1.0f;
+  dir.z = 2.0f * v - 1.0f;
 
-	if(dir.x*dir.x + dir.z*dir.z > 1.0f)
-		return make_float3(0.0f, 0.0f, 0.0f);
+  if (dir.x * dir.x + dir.z * dir.z > 1.0f)
+    return make_float3(0.0f, 0.0f, 0.0f);
 
-	dir.y = -sqrtf(max(1.0f - dir.x*dir.x - dir.z*dir.z, 0.0f));
+  dir.y = -sqrtf(max(1.0f - dir.x * dir.x - dir.z * dir.z, 0.0f));
 
-	/* reflection */
-	float3 I = make_float3(0.0f, -1.0f, 0.0f);
+  /* reflection */
+  float3 I = make_float3(0.0f, -1.0f, 0.0f);
 
-	return 2.0f*dot(dir, I)*dir - I;
+  return 2.0f * dot(dir, I) * dir - I;
 }
 
 ccl_device float2 direction_to_mirrorball(float3 dir)
 {
-	/* inverse of mirrorball_to_direction */
-	dir.y -= 1.0f;
+  /* inverse of mirrorball_to_direction */
+  dir.y -= 1.0f;
 
-	float div = 2.0f*sqrtf(max(-0.5f*dir.y, 0.0f));
-	if(div > 0.0f)
-		dir /= div;
+  float div = 2.0f * sqrtf(max(-0.5f * dir.y, 0.0f));
+  if (div > 0.0f)
+    dir /= div;
 
-	float u = 0.5f*(dir.x + 1.0f);
-	float v = 0.5f*(dir.z + 1.0f);
+  float u = 0.5f * (dir.x + 1.0f);
+  float v = 0.5f * (dir.z + 1.0f);
 
-	return make_float2(u, v);
+  return make_float2(u, v);
 }
 
 ccl_device_inline float3 panorama_to_direction(ccl_constant KernelCamera *cam, float u, float v)
 {
-	switch(cam->panorama_type) {
-		case PANORAMA_EQUIRECTANGULAR:
-			return equirectangular_range_to_direction(u, v, cam->equirectangular_range);
-		case PANORAMA_MIRRORBALL:
-			return mirrorball_to_direction(u, v);
-		case PANORAMA_FISHEYE_EQUIDISTANT:
-			return fisheye_to_direction(u, v, cam->fisheye_fov);
-		case PANORAMA_FISHEYE_EQUISOLID:
-		default:
-			return fisheye_equisolid_to_direction(u, v, cam->fisheye_lens,
-				cam->fisheye_fov, cam->sensorwidth, cam->sensorheight);
-	}
+  switch (cam->panorama_type) {
+    case PANORAMA_EQUIRECTANGULAR:
+      return equirectangular_range_to_direction(u, v, cam->equirectangular_range);
+    case PANORAMA_MIRRORBALL:
+      return mirrorball_to_direction(u, v);
+    case PANORAMA_FISHEYE_EQUIDISTANT:
+      return fisheye_to_direction(u, v, cam->fisheye_fov);
+    case PANORAMA_FISHEYE_EQUISOLID:
+    default:
+      return fisheye_equisolid_to_direction(
+          u, v, cam->fisheye_lens, cam->fisheye_fov, cam->sensorwidth, cam->sensorheight);
+  }
 }
 
 ccl_device_inline float2 direction_to_panorama(ccl_constant KernelCamera *cam, float3 dir)
 {
-	switch(cam->panorama_type) {
-		case PANORAMA_EQUIRECTANGULAR:
-			return direction_to_equirectangular_range(dir, cam->equirectangular_range);
-		case PANORAMA_MIRRORBALL:
-			return direction_to_mirrorball(dir);
-		case PANORAMA_FISHEYE_EQUIDISTANT:
-			return direction_to_fisheye(dir, cam->fisheye_fov);
-		case PANORAMA_FISHEYE_EQUISOLID:
-		default:
-			return direction_to_fisheye_equisolid(dir, cam->fisheye_lens,
-				cam->sensorwidth, cam->sensorheight);
-	}
+  switch (cam->panorama_type) {
+    case PANORAMA_EQUIRECTANGULAR:
+      return direction_to_equirectangular_range(dir, cam->equirectangular_range);
+    case PANORAMA_MIRRORBALL:
+      return direction_to_mirrorball(dir);
+    case PANORAMA_FISHEYE_EQUIDISTANT:
+      return direction_to_fisheye(dir, cam->fisheye_fov);
+    case PANORAMA_FISHEYE_EQUISOLID:
+    default:
+      return direction_to_fisheye_equisolid(
+          dir, cam->fisheye_lens, cam->sensorwidth, cam->sensorheight);
+  }
 }
 
-ccl_device_inline void spherical_stereo_transform(ccl_constant KernelCamera *cam, float3 *P, float3 *D)
+ccl_device_inline void spherical_stereo_transform(ccl_constant KernelCamera *cam,
+                                                  float3 *P,
+                                                  float3 *D)
 {
-	float interocular_offset = cam->interocular_offset;
-
-	/* Interocular offset of zero means either non stereo, or stereo without
-	 * spherical stereo. */
-	kernel_assert(interocular_offset != 0.0f);
-
-	if(cam->pole_merge_angle_to > 0.0f) {
-		const float pole_merge_angle_from = cam->pole_merge_angle_from,
-		            pole_merge_angle_to = cam->pole_merge_angle_to;
-		float altitude = fabsf(safe_asinf((*D).z));
-		if(altitude > pole_merge_angle_to) {
-			interocular_offset = 0.0f;
-		}
-		else if(altitude > pole_merge_angle_from) {
-			float fac = (altitude - pole_merge_angle_from) / (pole_merge_angle_to - pole_merge_angle_from);
-			float fade = cosf(fac * M_PI_2_F);
-			interocular_offset *= fade;
-		}
-	}
-
-	float3 up = make_float3(0.0f, 0.0f, 1.0f);
-	float3 side = normalize(cross(*D, up));
-	float3 stereo_offset = side * interocular_offset;
-
-	*P += stereo_offset;
-
-	/* Convergence distance is FLT_MAX in the case of parallel convergence mode,
-	 * no need to modify direction in this case either. */
-	const float convergence_distance = cam->convergence_distance;
-
-	if(convergence_distance != FLT_MAX)
-	{
-		float3 screen_offset = convergence_distance * (*D);
-		*D = normalize(screen_offset - stereo_offset);
-	}
+  float interocular_offset = cam->interocular_offset;
+
+  /* Interocular offset of zero means either non stereo, or stereo without
+   * spherical stereo. */
+  kernel_assert(interocular_offset != 0.0f);
+
+  if (cam->pole_merge_angle_to > 0.0f) {
+    const float pole_merge_angle_from = cam->pole_merge_angle_from,
+                pole_merge_angle_to = cam->pole_merge_angle_to;
+    float altitude = fabsf(safe_asinf((*D).z));
+    if (altitude > pole_merge_angle_to) {
+      interocular_offset = 0.0f;
+    }
+    else if (altitude > pole_merge_angle_from) {
+      float fac = (altitude - pole_merge_angle_from) /
+                  (pole_merge_angle_to - pole_merge_angle_from);
+      float fade = cosf(fac * M_PI_2_F);
+      interocular_offset *= fade;
+    }
+  }
+
+  float3 up = make_float3(0.0f, 0.0f, 1.0f);
+  float3 side = normalize(cross(*D, up));
+  float3 stereo_offset = side * interocular_offset;
+
+  *P += stereo_offset;
+
+  /* Convergence distance is FLT_MAX in the case of parallel convergence mode,
+   * no need to modify direction in this case either. */
+  const float convergence_distance = cam->convergence_distance;
+
+  if (convergence_distance != FLT_MAX) {
+    float3 screen_offset = convergence_distance * (*D);
+    *D = normalize(screen_offset - stereo_offset);
+  }
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_PROJECTION_CL__ */
+#endif /* __KERNEL_PROJECTION_CL__ */
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index de8cc4a0cef..91a39fc1465 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -23,24 +23,24 @@ CCL_NAMESPACE_BEGIN
  * Queue utility functions for split kernel
  */
 #ifdef __KERNEL_OPENCL__
-#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
-#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
+#  pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
+#  pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
 #endif
 
 /*
  * Enqueue ray index into the queue
  */
 ccl_device void enqueue_ray_index(
-        int ray_index,                /* Ray index to be enqueued. */
-        int queue_number,             /* Queue in which the ray index should be enqueued. */
-        ccl_global int *queues,       /* Buffer of all queues. */
-        int queue_size,               /* Size of each queue. */
-        ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
+    int ray_index,               /* Ray index to be enqueued. */
+    int queue_number,            /* Queue in which the ray index should be enqueued. */
+    ccl_global int *queues,      /* Buffer of all queues. */
+    int queue_size,              /* Size of each queue. */
+    ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
 {
-	/* This thread's queue index. */
-	int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint*)&queue_index[queue_number])
-	                   + (queue_number * queue_size);
-	queues[my_queue_index] = ray_index;
+  /* This thread's queue index. */
+  int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint *)&queue_index[queue_number]) +
+                       (queue_number * queue_size);
+  queues[my_queue_index] = ray_index;
 }
 
 /*
@@ -51,96 +51,95 @@ ccl_device void enqueue_ray_index(
  * is no more ray to allocate to other threads.
  */
 ccl_device int get_ray_index(
-        KernelGlobals *kg,
-        int thread_index,       /* Global thread index. */
-        int queue_number,       /* Queue to operate on. */
-        ccl_global int *queues, /* Buffer of all queues. */
-        int queuesize,          /* Size of a queue. */
-        int empty_queue)        /* Empty the queue slot as soon as we fetch the ray index. */
+    KernelGlobals *kg,
+    int thread_index,       /* Global thread index. */
+    int queue_number,       /* Queue to operate on. */
+    ccl_global int *queues, /* Buffer of all queues. */
+    int queuesize,          /* Size of a queue. */
+    int empty_queue)        /* Empty the queue slot as soon as we fetch the ray index. */
 {
-	int ray_index = queues[queue_number * queuesize + thread_index];
-	if(empty_queue && ray_index != QUEUE_EMPTY_SLOT) {
-		queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
-	}
-	return ray_index;
+  int ray_index = queues[queue_number * queuesize + thread_index];
+  if (empty_queue && ray_index != QUEUE_EMPTY_SLOT) {
+    queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+  }
+  return ray_index;
 }
 
 /* The following functions are to realize Local memory variant of enqueue ray index function. */
 
 /* All threads should call this function. */
 ccl_device void enqueue_ray_index_local(
-        int ray_index,                               /* Ray index to enqueue. */
-        int queue_number,                            /* Queue in which to enqueue ray index. */
-        char enqueue_flag,                           /* True for threads whose ray index has to be enqueued. */
-        int queuesize,                               /* queue size. */
-        ccl_local_param unsigned int *local_queue_atomics,   /* To to local queue atomics. */
-        ccl_global int *Queue_data,                  /* Queues. */
-        ccl_global int *Queue_index)                 /* To do global queue atomics. */
+    int ray_index,     /* Ray index to enqueue. */
+    int queue_number,  /* Queue in which to enqueue ray index. */
+    char enqueue_flag, /* True for threads whose ray index has to be enqueued. */
+    int queuesize,     /* queue size. */
+    ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */
+    ccl_global int *Queue_data,                        /* Queues. */
+    ccl_global int *Queue_index)                       /* To do global queue atomics. */
 {
-	int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
-
-	/* Get local queue id .*/
-	unsigned int lqidx;
-	if(enqueue_flag) {
-		lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics);
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	/* Get global queue offset. */
-	if(lidx == 0) {
-		*local_queue_atomics = atomic_fetch_and_add_uint32((ccl_global uint*)&Queue_index[queue_number],
-		                                                   *local_queue_atomics);
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	/* Get global queue index and enqueue ray. */
-	if(enqueue_flag) {
-		unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx;
-		Queue_data[my_gqidx] = ray_index;
-	}
+  int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
+
+  /* Get local queue id .*/
+  unsigned int lqidx;
+  if (enqueue_flag) {
+    lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics);
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  /* Get global queue offset. */
+  if (lidx == 0) {
+    *local_queue_atomics = atomic_fetch_and_add_uint32(
+        (ccl_global uint *)&Queue_index[queue_number], *local_queue_atomics);
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  /* Get global queue index and enqueue ray. */
+  if (enqueue_flag) {
+    unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx;
+    Queue_data[my_gqidx] = ray_index;
+  }
 }
 
 ccl_device unsigned int get_local_queue_index(
-        int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
-        ccl_local_param unsigned int *local_queue_atomics)
+    int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
+    ccl_local_param unsigned int *local_queue_atomics)
 {
-	int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]);
-	return my_lqidx;
+  int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]);
+  return my_lqidx;
 }
 
 ccl_device unsigned int get_global_per_queue_offset(
-        int queue_number,
-        ccl_local_param unsigned int *local_queue_atomics,
-        ccl_global int* global_queue_atomics)
+    int queue_number,
+    ccl_local_param unsigned int *local_queue_atomics,
+    ccl_global int *global_queue_atomics)
 {
-	unsigned int queue_offset = atomic_fetch_and_add_uint32((ccl_global uint*)&global_queue_atomics[queue_number],
-	                                                        local_queue_atomics[queue_number]);
-	return queue_offset;
+  unsigned int queue_offset = atomic_fetch_and_add_uint32(
+      (ccl_global uint *)&global_queue_atomics[queue_number], local_queue_atomics[queue_number]);
+  return queue_offset;
 }
 
 ccl_device unsigned int get_global_queue_index(
     int queue_number,
     int queuesize,
     unsigned int lqidx,
-    ccl_local_param unsigned int * global_per_queue_offset)
+    ccl_local_param unsigned int *global_per_queue_offset)
 {
-	int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number];
-	return my_gqidx;
+  int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number];
+  return my_gqidx;
 }
 
-ccl_device int dequeue_ray_index(
-        int queue_number,
-        ccl_global int *queues,
-        int queue_size,
-        ccl_global int *queue_index)
+ccl_device int dequeue_ray_index(int queue_number,
+                                 ccl_global int *queues,
+                                 int queue_size,
+                                 ccl_global int *queue_index)
 {
-	int index = atomic_fetch_and_dec_uint32((ccl_global uint*)&queue_index[queue_number])-1;
+  int index = atomic_fetch_and_dec_uint32((ccl_global uint *)&queue_index[queue_number]) - 1;
 
-	if(index < 0) {
-		return QUEUE_EMPTY_SLOT;
-	}
+  if (index < 0) {
+    return QUEUE_EMPTY_SLOT;
+  }
 
-	return queues[index + queue_number * queue_size];
+  return queues[index + queue_number * queue_size];
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 61ddf4a4f81..6779c1f7160 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -23,7 +23,6 @@ CCL_NAMESPACE_BEGIN
  * this single threaded on a CPU for repeatable results. */
 //#define __DEBUG_CORRELATION__
 
-
 /* High Dimensional Sobol.
  *
  * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal
@@ -36,136 +35,138 @@ CCL_NAMESPACE_BEGIN
  * progressive pattern that doesn't suffer from this problem, because even
  * with this offset some dimensions are quite poor.
  */
-#define SOBOL_SKIP 64
+#  define SOBOL_SKIP 64
 
 ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
 {
-	uint result = 0;
-	uint i = index + SOBOL_SKIP;
-	for(uint j = 0; i; i >>= 1, j++) {
-		if(i & 1) {
-			result ^= kernel_tex_fetch(__sobol_directions, 32*dimension + j);
-		}
-	}
-	return result;
+  uint result = 0;
+  uint i = index + SOBOL_SKIP;
+  for (uint j = 0; i; i >>= 1, j++) {
+    if (i & 1) {
+      result ^= kernel_tex_fetch(__sobol_directions, 32 * dimension + j);
+    }
+  }
+  return result;
 }
 
-#endif  /* __SOBOL__ */
-
+#endif /* __SOBOL__ */
 
-ccl_device_forceinline float path_rng_1D(KernelGlobals *kg,
-                                         uint rng_hash,
-                                         int sample, int num_samples,
-                                         int dimension)
+ccl_device_forceinline float path_rng_1D(
+    KernelGlobals *kg, uint rng_hash, int sample, int num_samples, int dimension)
 {
 #ifdef __DEBUG_CORRELATION__
-	return (float)drand48();
+  return (float)drand48();
 #endif
 
 #ifdef __CMJ__
 #  ifdef __SOBOL__
-	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
+  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
 #  endif
-	{
-		/* Correlated multi-jitter. */
-		int p = rng_hash + dimension;
-		return cmj_sample_1D(sample, num_samples, p);
-	}
+  {
+    /* Correlated multi-jitter. */
+    int p = rng_hash + dimension;
+    return cmj_sample_1D(sample, num_samples, p);
+  }
 #endif
 
 #ifdef __SOBOL__
-	/* Sobol sequence value using direction vectors. */
-	uint result = sobol_dimension(kg, sample, dimension);
-	float r = (float)result * (1.0f/(float)0xFFFFFFFF);
+  /* Sobol sequence value using direction vectors. */
+  uint result = sobol_dimension(kg, sample, dimension);
+  float r = (float)result * (1.0f / (float)0xFFFFFFFF);
 
-	/* Cranly-Patterson rotation using rng seed */
-	float shift;
+  /* Cranly-Patterson rotation using rng seed */
+  float shift;
 
-	/* Hash rng with dimension to solve correlation issues.
-	 * See T38710, T50116.
-	 */
-	uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
-	shift = tmp_rng * (1.0f/(float)0xFFFFFFFF);
+  /* Hash rng with dimension to solve correlation issues.
+   * See T38710, T50116.
+   */
+  uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+  shift = tmp_rng * (1.0f / (float)0xFFFFFFFF);
 
-	return r + shift - floorf(r + shift);
+  return r + shift - floorf(r + shift);
 #endif
 }
 
 ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
                                         uint rng_hash,
-                                        int sample, int num_samples,
+                                        int sample,
+                                        int num_samples,
                                         int dimension,
-                                        float *fx, float *fy)
+                                        float *fx,
+                                        float *fy)
 {
 #ifdef __DEBUG_CORRELATION__
-	*fx = (float)drand48();
-	*fy = (float)drand48();
-	return;
+  *fx = (float)drand48();
+  *fy = (float)drand48();
+  return;
 #endif
 
 #ifdef __CMJ__
 #  ifdef __SOBOL__
-	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
+  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
 #  endif
-	{
-		/* Correlated multi-jitter. */
-		int p = rng_hash + dimension;
-		cmj_sample_2D(sample, num_samples, p, fx, fy);
-		return;
-	}
+  {
+    /* Correlated multi-jitter. */
+    int p = rng_hash + dimension;
+    cmj_sample_2D(sample, num_samples, p, fx, fy);
+    return;
+  }
 #endif
 
 #ifdef __SOBOL__
-	/* Sobol. */
-	*fx = path_rng_1D(kg, rng_hash, sample, num_samples, dimension);
-	*fy = path_rng_1D(kg, rng_hash, sample, num_samples, dimension + 1);
+  /* Sobol. */
+  *fx = path_rng_1D(kg, rng_hash, sample, num_samples, dimension);
+  *fy = path_rng_1D(kg, rng_hash, sample, num_samples, dimension + 1);
 #endif
 }
 
 ccl_device_inline void path_rng_init(KernelGlobals *kg,
-                                     int sample, int num_samples,
+                                     int sample,
+                                     int num_samples,
                                      uint *rng_hash,
-                                     int x, int y,
-                                     float *fx, float *fy)
+                                     int x,
+                                     int y,
+                                     float *fx,
+                                     float *fy)
 {
-	/* load state */
-	*rng_hash = hash_int_2d(x, y);
-	*rng_hash ^= kernel_data.integrator.seed;
+  /* load state */
+  *rng_hash = hash_int_2d(x, y);
+  *rng_hash ^= kernel_data.integrator.seed;
 
 #ifdef __DEBUG_CORRELATION__
-	srand48(*rng_hash + sample);
+  srand48(*rng_hash + sample);
 #endif
 
-	if(sample == 0) {
-		*fx = 0.5f;
-		*fy = 0.5f;
-	}
-	else {
-		path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_FILTER_U, fx, fy);
-	}
+  if (sample == 0) {
+    *fx = 0.5f;
+    *fy = 0.5f;
+  }
+  else {
+    path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_FILTER_U, fx, fy);
+  }
 }
 
 /* Linear Congruential Generator */
 
 ccl_device uint lcg_step_uint(uint *rng)
 {
-	/* implicit mod 2^32 */
-	*rng = (1103515245*(*rng) + 12345);
-	return *rng;
+  /* implicit mod 2^32 */
+  *rng = (1103515245 * (*rng) + 12345);
+  return *rng;
 }
 
 ccl_device float lcg_step_float(uint *rng)
 {
-	/* implicit mod 2^32 */
-	*rng = (1103515245*(*rng) + 12345);
-	return (float)*rng * (1.0f/(float)0xFFFFFFFF);
+  /* implicit mod 2^32 */
+  *rng = (1103515245 * (*rng) + 12345);
+  return (float)*rng * (1.0f / (float)0xFFFFFFFF);
 }
 
 ccl_device uint lcg_init(uint seed)
 {
-	uint rng = seed;
-	lcg_step_uint(&rng);
-	return rng;
+  uint rng = seed;
+  lcg_step_uint(&rng);
+  return rng;
 }
 
 /* Path Tracing Utility Functions
@@ -181,118 +182,107 @@ ccl_device_inline float path_state_rng_1D(KernelGlobals *kg,
                                           const ccl_addr_space PathState *state,
                                           int dimension)
 {
-	return path_rng_1D(kg,
-	                   state->rng_hash,
-	                   state->sample, state->num_samples,
-	                   state->rng_offset + dimension);
+  return path_rng_1D(
+      kg, state->rng_hash, state->sample, state->num_samples, state->rng_offset + dimension);
 }
 
-ccl_device_inline void path_state_rng_2D(KernelGlobals *kg,
-                                         const ccl_addr_space PathState *state,
-                                         int dimension,
-                                         float *fx, float *fy)
+ccl_device_inline void path_state_rng_2D(
+    KernelGlobals *kg, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy)
 {
-	path_rng_2D(kg,
-	            state->rng_hash,
-	            state->sample, state->num_samples,
-	            state->rng_offset + dimension,
-	            fx, fy);
+  path_rng_2D(kg,
+              state->rng_hash,
+              state->sample,
+              state->num_samples,
+              state->rng_offset + dimension,
+              fx,
+              fy);
 }
 
 ccl_device_inline float path_state_rng_1D_hash(KernelGlobals *kg,
-                                          const ccl_addr_space PathState *state,
-                                          uint hash)
+                                               const ccl_addr_space PathState *state,
+                                               uint hash)
 {
-	/* Use a hash instead of dimension, this is not great but avoids adding
-	 * more dimensions to each bounce which reduces quality of dimensions we
-	 * are already using. */
-	return path_rng_1D(kg,
-	                   cmj_hash_simple(state->rng_hash, hash),
-	                   state->sample, state->num_samples,
-	                   state->rng_offset);
+  /* Use a hash instead of dimension, this is not great but avoids adding
+   * more dimensions to each bounce which reduces quality of dimensions we
+   * are already using. */
+  return path_rng_1D(kg,
+                     cmj_hash_simple(state->rng_hash, hash),
+                     state->sample,
+                     state->num_samples,
+                     state->rng_offset);
 }
 
-ccl_device_inline float path_branched_rng_1D(
-        KernelGlobals *kg,
-        uint rng_hash,
-        const ccl_addr_space PathState *state,
-        int branch,
-        int num_branches,
-        int dimension)
+ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg,
+                                             uint rng_hash,
+                                             const ccl_addr_space PathState *state,
+                                             int branch,
+                                             int num_branches,
+                                             int dimension)
 {
-	return path_rng_1D(kg,
-	                   rng_hash,
-	                   state->sample * num_branches + branch,
-	                   state->num_samples * num_branches,
-	                   state->rng_offset + dimension);
+  return path_rng_1D(kg,
+                     rng_hash,
+                     state->sample * num_branches + branch,
+                     state->num_samples * num_branches,
+                     state->rng_offset + dimension);
 }
 
-ccl_device_inline void path_branched_rng_2D(
-        KernelGlobals *kg,
-        uint rng_hash,
-        const ccl_addr_space PathState *state,
-        int branch,
-        int num_branches,
-        int dimension,
-        float *fx, float *fy)
+ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg,
+                                            uint rng_hash,
+                                            const ccl_addr_space PathState *state,
+                                            int branch,
+                                            int num_branches,
+                                            int dimension,
+                                            float *fx,
+                                            float *fy)
 {
-	path_rng_2D(kg,
-	            rng_hash,
-	            state->sample * num_branches + branch,
-	            state->num_samples * num_branches,
-	            state->rng_offset + dimension,
-	            fx, fy);
+  path_rng_2D(kg,
+              rng_hash,
+              state->sample * num_branches + branch,
+              state->num_samples * num_branches,
+              state->rng_offset + dimension,
+              fx,
+              fy);
 }
 
 /* Utitility functions to get light termination value,
  * since it might not be needed in many cases.
  */
-ccl_device_inline float path_state_rng_light_termination(
-        KernelGlobals *kg,
-        const ccl_addr_space PathState *state)
+ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg,
+                                                         const ccl_addr_space PathState *state)
 {
-	if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
-		return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE);
-	}
-	return 0.0f;
+  if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
+    return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE);
+  }
+  return 0.0f;
 }
 
-ccl_device_inline float path_branched_rng_light_termination(
-        KernelGlobals *kg,
-        uint rng_hash,
-        const ccl_addr_space PathState *state,
-        int branch,
-        int num_branches)
+ccl_device_inline float path_branched_rng_light_termination(KernelGlobals *kg,
+                                                            uint rng_hash,
+                                                            const ccl_addr_space PathState *state,
+                                                            int branch,
+                                                            int num_branches)
 {
-	if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
-		return path_branched_rng_1D(kg,
-		                            rng_hash,
-		                            state,
-		                            branch,
-		                            num_branches,
-		                            PRNG_LIGHT_TERMINATE);
-	}
-	return 0.0f;
+  if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
+    return path_branched_rng_1D(kg, rng_hash, state, branch, num_branches, PRNG_LIGHT_TERMINATE);
+  }
+  return 0.0f;
 }
 
-ccl_device_inline uint lcg_state_init(PathState *state,
-                                      uint scramble)
+ccl_device_inline uint lcg_state_init(PathState *state, uint scramble)
 {
-	return lcg_init(state->rng_hash + state->rng_offset + state->sample*scramble);
+  return lcg_init(state->rng_hash + state->rng_offset + state->sample * scramble);
 }
 
-ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space PathState *state,
-                                                uint scramble)
+ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space PathState *state, uint scramble)
 {
-	return lcg_init(state->rng_hash + state->rng_offset + state->sample*scramble);
+  return lcg_init(state->rng_hash + state->rng_offset + state->sample * scramble);
 }
 
-
 ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
 {
-	/* Implicit mod 2^32 */
-	*rng = (1103515245*(*rng) + 12345);
-	return (float)*rng * (1.0f/(float)0xFFFFFFFF);
+  /* Implicit mod 2^32 */
+  *rng = (1103515245 * (*rng) + 12345);
+  return (float)*rng * (1.0f / (float)0xFFFFFFFF);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index b1da523501d..351b623addb 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -37,14 +37,14 @@ CCL_NAMESPACE_BEGIN
 #ifdef __OBJECT_MOTION__
 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
 {
-	if(sd->object_flag & SD_OBJECT_MOTION) {
-		sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
-		sd->ob_itfm = transform_quick_inverse(sd->ob_tfm);
-	}
-	else {
-		sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-		sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
-	}
+  if (sd->object_flag & SD_OBJECT_MOTION) {
+    sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
+    sd->ob_itfm = transform_quick_inverse(sd->ob_tfm);
+  }
+  else {
+    sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+    sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+  }
 }
 #endif
 
@@ -53,104 +53,104 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
                                                const Intersection *isect,
                                                const Ray *ray)
 {
-	PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+  PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
 
 #ifdef __INSTANCING__
-	sd->object = (isect->object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
+  sd->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) :
+                                                isect->object;
 #endif
-	sd->lamp = LAMP_NONE;
+  sd->lamp = LAMP_NONE;
 
-	sd->type = isect->type;
-	sd->flag = 0;
-	sd->object_flag = kernel_tex_fetch(__object_flag,
-	                                              sd->object);
+  sd->type = isect->type;
+  sd->flag = 0;
+  sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
 
-	/* matrices and time */
+  /* matrices and time */
 #ifdef __OBJECT_MOTION__
-	shader_setup_object_transforms(kg, sd, ray->time);
+  shader_setup_object_transforms(kg, sd, ray->time);
 #endif
-	sd->time = ray->time;
+  sd->time = ray->time;
 
-	sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
-	sd->ray_length = isect->t;
+  sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
+  sd->ray_length = isect->t;
 
 #ifdef __UV__
-	sd->u = isect->u;
-	sd->v = isect->v;
+  sd->u = isect->u;
+  sd->v = isect->v;
 #endif
 
 #ifdef __HAIR__
-	if(sd->type & PRIMITIVE_ALL_CURVE) {
-		/* curve */
-		float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
-
-		sd->shader = __float_as_int(curvedata.z);
-		sd->P = curve_refine(kg, sd, isect, ray);
-	}
-	else
+  if (sd->type & PRIMITIVE_ALL_CURVE) {
+    /* curve */
+    float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+
+    sd->shader = __float_as_int(curvedata.z);
+    sd->P = curve_refine(kg, sd, isect, ray);
+  }
+  else
 #endif
-	if(sd->type & PRIMITIVE_TRIANGLE) {
-		/* static triangle */
-		float3 Ng = triangle_normal(kg, sd);
-		sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+      if (sd->type & PRIMITIVE_TRIANGLE) {
+    /* static triangle */
+    float3 Ng = triangle_normal(kg, sd);
+    sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
 
-		/* vectors */
-		sd->P = triangle_refine(kg, sd, isect, ray);
-		sd->Ng = Ng;
-		sd->N = Ng;
+    /* vectors */
+    sd->P = triangle_refine(kg, sd, isect, ray);
+    sd->Ng = Ng;
+    sd->N = Ng;
 
-		/* smooth normal */
-		if(sd->shader & SHADER_SMOOTH_NORMAL)
-			sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
+    /* smooth normal */
+    if (sd->shader & SHADER_SMOOTH_NORMAL)
+      sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
 
 #ifdef __DPDU__
-		/* dPdu/dPdv */
-		triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
+    /* dPdu/dPdv */
+    triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
 #endif
-	}
-	else {
-		/* motion triangle */
-		motion_triangle_shader_setup(kg, sd, isect, ray, false);
-	}
+  }
+  else {
+    /* motion triangle */
+    motion_triangle_shader_setup(kg, sd, isect, ray, false);
+  }
 
-	sd->I = -ray->D;
+  sd->I = -ray->D;
 
-	sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+  sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
 
 #ifdef __INSTANCING__
-	if(isect->object != OBJECT_NONE) {
-		/* instance transform */
-		object_normal_transform_auto(kg, sd, &sd->N);
-		object_normal_transform_auto(kg, sd, &sd->Ng);
+  if (isect->object != OBJECT_NONE) {
+    /* instance transform */
+    object_normal_transform_auto(kg, sd, &sd->N);
+    object_normal_transform_auto(kg, sd, &sd->Ng);
 #  ifdef __DPDU__
-		object_dir_transform_auto(kg, sd, &sd->dPdu);
-		object_dir_transform_auto(kg, sd, &sd->dPdv);
+    object_dir_transform_auto(kg, sd, &sd->dPdu);
+    object_dir_transform_auto(kg, sd, &sd->dPdv);
 #  endif
-	}
+  }
 #endif
 
-	/* backfacing test */
-	bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
+  /* backfacing test */
+  bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
 
-	if(backfacing) {
-		sd->flag |= SD_BACKFACING;
-		sd->Ng = -sd->Ng;
-		sd->N = -sd->N;
+  if (backfacing) {
+    sd->flag |= SD_BACKFACING;
+    sd->Ng = -sd->Ng;
+    sd->N = -sd->N;
 #ifdef __DPDU__
-		sd->dPdu = -sd->dPdu;
-		sd->dPdv = -sd->dPdv;
+    sd->dPdu = -sd->dPdu;
+    sd->dPdv = -sd->dPdv;
 #endif
-	}
+  }
 
 #ifdef __RAY_DIFFERENTIALS__
-	/* differentials */
-	differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t);
-	differential_incoming(&sd->dI, ray->dD);
-	differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
+  /* differentials */
+  differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t);
+  differential_incoming(&sd->dI, ray->dD);
+  differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
 #endif
 
-	PROFILING_SHADER(sd->shader);
-	PROFILING_OBJECT(sd->object);
+  PROFILING_SHADER(sd->shader);
+  PROFILING_OBJECT(sd->object);
 }
 
 /* ShaderData setup from BSSRDF scatter */
@@ -161,86 +161,86 @@ ccl_device
 #  else
 ccl_device_inline
 #  endif
-void shader_setup_from_subsurface(
-        KernelGlobals *kg,
-        ShaderData *sd,
-        const Intersection *isect,
-        const Ray *ray)
+    void
+    shader_setup_from_subsurface(KernelGlobals *kg,
+                                 ShaderData *sd,
+                                 const Intersection *isect,
+                                 const Ray *ray)
 {
-	PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+  PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
 
-	const bool backfacing = sd->flag & SD_BACKFACING;
+  const bool backfacing = sd->flag & SD_BACKFACING;
 
-	/* object, matrices, time, ray_length stay the same */
-	sd->flag = 0;
-	sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
-	sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
-	sd->type = isect->type;
+  /* object, matrices, time, ray_length stay the same */
+  sd->flag = 0;
+  sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
+  sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
+  sd->type = isect->type;
 
 #  ifdef __UV__
-	sd->u = isect->u;
-	sd->v = isect->v;
+  sd->u = isect->u;
+  sd->v = isect->v;
 #  endif
 
-	/* fetch triangle data */
-	if(sd->type == PRIMITIVE_TRIANGLE) {
-		float3 Ng = triangle_normal(kg, sd);
-		sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+  /* fetch triangle data */
+  if (sd->type == PRIMITIVE_TRIANGLE) {
+    float3 Ng = triangle_normal(kg, sd);
+    sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
 
-		/* static triangle */
-		sd->P = triangle_refine_local(kg, sd, isect, ray);
-		sd->Ng = Ng;
-		sd->N = Ng;
+    /* static triangle */
+    sd->P = triangle_refine_local(kg, sd, isect, ray);
+    sd->Ng = Ng;
+    sd->N = Ng;
 
-		if(sd->shader & SHADER_SMOOTH_NORMAL)
-			sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
+    if (sd->shader & SHADER_SMOOTH_NORMAL)
+      sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
 
 #  ifdef __DPDU__
-		/* dPdu/dPdv */
-		triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
+    /* dPdu/dPdv */
+    triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
 #  endif
-	}
-	else {
-		/* motion triangle */
-		motion_triangle_shader_setup(kg, sd, isect, ray, true);
-	}
+  }
+  else {
+    /* motion triangle */
+    motion_triangle_shader_setup(kg, sd, isect, ray, true);
+  }
 
-	sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+  sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
 
 #  ifdef __INSTANCING__
-	if(isect->object != OBJECT_NONE) {
-		/* instance transform */
-		object_normal_transform_auto(kg, sd, &sd->N);
-		object_normal_transform_auto(kg, sd, &sd->Ng);
+  if (isect->object != OBJECT_NONE) {
+    /* instance transform */
+    object_normal_transform_auto(kg, sd, &sd->N);
+    object_normal_transform_auto(kg, sd, &sd->Ng);
 #    ifdef __DPDU__
-		object_dir_transform_auto(kg, sd, &sd->dPdu);
-		object_dir_transform_auto(kg, sd, &sd->dPdv);
+    object_dir_transform_auto(kg, sd, &sd->dPdu);
+    object_dir_transform_auto(kg, sd, &sd->dPdv);
 #    endif
-	}
+  }
 #  endif
 
-	/* backfacing test */
-	if(backfacing) {
-		sd->flag |= SD_BACKFACING;
-		sd->Ng = -sd->Ng;
-		sd->N = -sd->N;
+  /* backfacing test */
+  if (backfacing) {
+    sd->flag |= SD_BACKFACING;
+    sd->Ng = -sd->Ng;
+    sd->N = -sd->N;
 #  ifdef __DPDU__
-		sd->dPdu = -sd->dPdu;
-		sd->dPdv = -sd->dPdv;
+    sd->dPdu = -sd->dPdu;
+    sd->dPdv = -sd->dPdv;
 #  endif
-	}
+  }
 
-	/* should not get used in principle as the shading will only use a diffuse
-	 * BSDF, but the shader might still access it */
-	sd->I = sd->N;
+  /* should not get used in principle as the shading will only use a diffuse
+   * BSDF, but the shader might still access it */
+  sd->I = sd->N;
 
 #  ifdef __RAY_DIFFERENTIALS__
-	/* differentials */
-	differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
-	/* don't modify dP and dI */
+  /* differentials */
+  differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
+  /* don't modify dP and dI */
 #  endif
 
-	PROFILING_SHADER(sd->shader);
+  PROFILING_SHADER(sd->shader);
 }
 #endif
 
@@ -251,194 +251,208 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
                                                 const float3 P,
                                                 const float3 Ng,
                                                 const float3 I,
-                                                int shader, int object, int prim,
-                                                float u, float v, float t,
+                                                int shader,
+                                                int object,
+                                                int prim,
+                                                float u,
+                                                float v,
+                                                float t,
                                                 float time,
                                                 bool object_space,
                                                 int lamp)
 {
-	PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
-
-	/* vectors */
-	sd->P = P;
-	sd->N = Ng;
-	sd->Ng = Ng;
-	sd->I = I;
-	sd->shader = shader;
-	if(prim != PRIM_NONE)
-		sd->type = PRIMITIVE_TRIANGLE;
-	else if(lamp != LAMP_NONE)
-		sd->type = PRIMITIVE_LAMP;
-	else
-		sd->type = PRIMITIVE_NONE;
-
-	/* primitive */
+  PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+
+  /* vectors */
+  sd->P = P;
+  sd->N = Ng;
+  sd->Ng = Ng;
+  sd->I = I;
+  sd->shader = shader;
+  if (prim != PRIM_NONE)
+    sd->type = PRIMITIVE_TRIANGLE;
+  else if (lamp != LAMP_NONE)
+    sd->type = PRIMITIVE_LAMP;
+  else
+    sd->type = PRIMITIVE_NONE;
+
+    /* primitive */
 #ifdef __INSTANCING__
-	sd->object = object;
+  sd->object = object;
 #endif
-	sd->lamp = LAMP_NONE;
-	/* currently no access to bvh prim index for strand sd->prim*/
-	sd->prim = prim;
+  sd->lamp = LAMP_NONE;
+  /* currently no access to bvh prim index for strand sd->prim*/
+  sd->prim = prim;
 #ifdef __UV__
-	sd->u = u;
-	sd->v = v;
+  sd->u = u;
+  sd->v = v;
 #endif
-	sd->time = time;
-	sd->ray_length = t;
+  sd->time = time;
+  sd->ray_length = t;
 
-	sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-	sd->object_flag = 0;
-	if(sd->object != OBJECT_NONE) {
-		sd->object_flag |= kernel_tex_fetch(__object_flag,
-		                                    sd->object);
+  sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+  sd->object_flag = 0;
+  if (sd->object != OBJECT_NONE) {
+    sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
 
 #ifdef __OBJECT_MOTION__
-		shader_setup_object_transforms(kg, sd, time);
-	}
-	else if(lamp != LAMP_NONE) {
-		sd->ob_tfm  = lamp_fetch_transform(kg, lamp, false);
-		sd->ob_itfm = lamp_fetch_transform(kg, lamp, true);
-		sd->lamp = lamp;
+    shader_setup_object_transforms(kg, sd, time);
+  }
+  else if (lamp != LAMP_NONE) {
+    sd->ob_tfm = lamp_fetch_transform(kg, lamp, false);
+    sd->ob_itfm = lamp_fetch_transform(kg, lamp, true);
+    sd->lamp = lamp;
 #else
-	}
-	else if(lamp != LAMP_NONE) {
-		sd->lamp = lamp;
+  }
+  else if (lamp != LAMP_NONE) {
+    sd->lamp = lamp;
 #endif
-	}
+  }
 
-	/* transform into world space */
-	if(object_space) {
-		object_position_transform_auto(kg, sd, &sd->P);
-		object_normal_transform_auto(kg, sd, &sd->Ng);
-		sd->N = sd->Ng;
-		object_dir_transform_auto(kg, sd, &sd->I);
-	}
+  /* transform into world space */
+  if (object_space) {
+    object_position_transform_auto(kg, sd, &sd->P);
+    object_normal_transform_auto(kg, sd, &sd->Ng);
+    sd->N = sd->Ng;
+    object_dir_transform_auto(kg, sd, &sd->I);
+  }
 
-	if(sd->type & PRIMITIVE_TRIANGLE) {
-		/* smooth normal */
-		if(sd->shader & SHADER_SMOOTH_NORMAL) {
-			sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
+  if (sd->type & PRIMITIVE_TRIANGLE) {
+    /* smooth normal */
+    if (sd->shader & SHADER_SMOOTH_NORMAL) {
+      sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
 
 #ifdef __INSTANCING__
-			if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-				object_normal_transform_auto(kg, sd, &sd->N);
-			}
+      if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+        object_normal_transform_auto(kg, sd, &sd->N);
+      }
 #endif
-		}
+    }
 
-		/* dPdu/dPdv */
+    /* dPdu/dPdv */
 #ifdef __DPDU__
-		triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
+    triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
 
 #  ifdef __INSTANCING__
-		if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-			object_dir_transform_auto(kg, sd, &sd->dPdu);
-			object_dir_transform_auto(kg, sd, &sd->dPdv);
-		}
+    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+      object_dir_transform_auto(kg, sd, &sd->dPdu);
+      object_dir_transform_auto(kg, sd, &sd->dPdv);
+    }
 #  endif
 #endif
-	}
-	else {
+  }
+  else {
 #ifdef __DPDU__
-		sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
-		sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
+    sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
+    sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
 #endif
-	}
+  }
 
-	/* backfacing test */
-	if(sd->prim != PRIM_NONE) {
-		bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
+  /* backfacing test */
+  if (sd->prim != PRIM_NONE) {
+    bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
 
-		if(backfacing) {
-			sd->flag |= SD_BACKFACING;
-			sd->Ng = -sd->Ng;
-			sd->N = -sd->N;
+    if (backfacing) {
+      sd->flag |= SD_BACKFACING;
+      sd->Ng = -sd->Ng;
+      sd->N = -sd->N;
 #ifdef __DPDU__
-			sd->dPdu = -sd->dPdu;
-			sd->dPdv = -sd->dPdv;
+      sd->dPdu = -sd->dPdu;
+      sd->dPdv = -sd->dPdv;
 #endif
-		}
-	}
+    }
+  }
 
 #ifdef __RAY_DIFFERENTIALS__
-	/* no ray differentials here yet */
-	sd->dP = differential3_zero();
-	sd->dI = differential3_zero();
-	sd->du = differential_zero();
-	sd->dv = differential_zero();
+  /* no ray differentials here yet */
+  sd->dP = differential3_zero();
+  sd->dI = differential3_zero();
+  sd->du = differential_zero();
+  sd->dv = differential_zero();
 #endif
 
-	PROFILING_SHADER(sd->shader);
-	PROFILING_OBJECT(sd->object);
+  PROFILING_SHADER(sd->shader);
+  PROFILING_OBJECT(sd->object);
 }
 
 /* ShaderData setup for displacement */
 
-ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
-	int object, int prim, float u, float v)
+ccl_device void shader_setup_from_displace(
+    KernelGlobals *kg, ShaderData *sd, int object, int prim, float u, float v)
 {
-	float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
-	int shader;
-
-	triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
-
-	/* force smooth shading for displacement */
-	shader |= SHADER_SMOOTH_NORMAL;
-
-	shader_setup_from_sample(kg, sd,
-	                         P, Ng, I,
-	                         shader, object, prim,
-	                         u, v, 0.0f, 0.5f,
-	                         !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
-	                         LAMP_NONE);
+  float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
+  int shader;
+
+  triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
+
+  /* force smooth shading for displacement */
+  shader |= SHADER_SMOOTH_NORMAL;
+
+  shader_setup_from_sample(
+      kg,
+      sd,
+      P,
+      Ng,
+      I,
+      shader,
+      object,
+      prim,
+      u,
+      v,
+      0.0f,
+      0.5f,
+      !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
+      LAMP_NONE);
 }
 
 /* ShaderData setup from ray into background */
 
-ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
+ccl_device_inline void shader_setup_from_background(KernelGlobals *kg,
+                                                    ShaderData *sd,
+                                                    const Ray *ray)
 {
-	PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
-
-	/* vectors */
-	sd->P = ray->D;
-	sd->N = -ray->D;
-	sd->Ng = -ray->D;
-	sd->I = -ray->D;
-	sd->shader = kernel_data.background.surface_shader;
-	sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-	sd->object_flag = 0;
-	sd->time = ray->time;
-	sd->ray_length = 0.0f;
+  PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+
+  /* vectors */
+  sd->P = ray->D;
+  sd->N = -ray->D;
+  sd->Ng = -ray->D;
+  sd->I = -ray->D;
+  sd->shader = kernel_data.background.surface_shader;
+  sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+  sd->object_flag = 0;
+  sd->time = ray->time;
+  sd->ray_length = 0.0f;
 
 #ifdef __INSTANCING__
-	sd->object = OBJECT_NONE;
+  sd->object = OBJECT_NONE;
 #endif
-	sd->lamp = LAMP_NONE;
-	sd->prim = PRIM_NONE;
+  sd->lamp = LAMP_NONE;
+  sd->prim = PRIM_NONE;
 #ifdef __UV__
-	sd->u = 0.0f;
-	sd->v = 0.0f;
+  sd->u = 0.0f;
+  sd->v = 0.0f;
 #endif
 
 #ifdef __DPDU__
-	/* dPdu/dPdv */
-	sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
-	sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
+  /* dPdu/dPdv */
+  sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
+  sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
 #endif
 
 #ifdef __RAY_DIFFERENTIALS__
-	/* differentials */
-	sd->dP = ray->dD;
-	differential_incoming(&sd->dI, sd->dP);
-	sd->du = differential_zero();
-	sd->dv = differential_zero();
+  /* differentials */
+  sd->dP = ray->dD;
+  differential_incoming(&sd->dI, sd->dP);
+  sd->du = differential_zero();
+  sd->dv = differential_zero();
 #endif
 
-	/* for NDC coordinates */
-	sd->ray_P = ray->P;
+  /* for NDC coordinates */
+  sd->ray_P = ray->P;
 
-	PROFILING_SHADER(sd->shader);
-	PROFILING_OBJECT(sd->object);
+  PROFILING_SHADER(sd->shader);
+  PROFILING_OBJECT(sd->object);
 }
 
 /* ShaderData setup from point inside volume */
@@ -446,141 +460,145 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
 #ifdef __VOLUME__
 ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
 {
-	PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
-
-	/* vectors */
-	sd->P = ray->P;
-	sd->N = -ray->D;
-	sd->Ng = -ray->D;
-	sd->I = -ray->D;
-	sd->shader = SHADER_NONE;
-	sd->flag = 0;
-	sd->object_flag = 0;
-	sd->time = ray->time;
-	sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
+  PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
+
+  /* vectors */
+  sd->P = ray->P;
+  sd->N = -ray->D;
+  sd->Ng = -ray->D;
+  sd->I = -ray->D;
+  sd->shader = SHADER_NONE;
+  sd->flag = 0;
+  sd->object_flag = 0;
+  sd->time = ray->time;
+  sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
 
 #  ifdef __INSTANCING__
-	sd->object = OBJECT_NONE; /* todo: fill this for texture coordinates */
+  sd->object = OBJECT_NONE; /* todo: fill this for texture coordinates */
 #  endif
-	sd->lamp = LAMP_NONE;
-	sd->prim = PRIM_NONE;
-	sd->type = PRIMITIVE_NONE;
+  sd->lamp = LAMP_NONE;
+  sd->prim = PRIM_NONE;
+  sd->type = PRIMITIVE_NONE;
 
 #  ifdef __UV__
-	sd->u = 0.0f;
-	sd->v = 0.0f;
+  sd->u = 0.0f;
+  sd->v = 0.0f;
 #  endif
 
 #  ifdef __DPDU__
-	/* dPdu/dPdv */
-	sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
-	sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
+  /* dPdu/dPdv */
+  sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
+  sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
 #  endif
 
 #  ifdef __RAY_DIFFERENTIALS__
-	/* differentials */
-	sd->dP = ray->dD;
-	differential_incoming(&sd->dI, sd->dP);
-	sd->du = differential_zero();
-	sd->dv = differential_zero();
+  /* differentials */
+  sd->dP = ray->dD;
+  differential_incoming(&sd->dI, sd->dP);
+  sd->du = differential_zero();
+  sd->dv = differential_zero();
 #  endif
 
-	/* for NDC coordinates */
-	sd->ray_P = ray->P;
-	sd->ray_dP = ray->dP;
+  /* for NDC coordinates */
+  sd->ray_P = ray->P;
+  sd->ray_dP = ray->dP;
 
-	PROFILING_SHADER(sd->shader);
-	PROFILING_OBJECT(sd->object);
+  PROFILING_SHADER(sd->shader);
+  PROFILING_OBJECT(sd->object);
 }
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 /* Merging */
 
 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
 ccl_device_inline void shader_merge_closures(ShaderData *sd)
 {
-	/* merge identical closures, better when we sample a single closure at a time */
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sci = &sd->closure[i];
-
-		for(int j = i + 1; j < sd->num_closure; j++) {
-			ShaderClosure *scj = &sd->closure[j];
-
-			if(sci->type != scj->type)
-				continue;
-			if(!bsdf_merge(sci, scj))
-				continue;
-
-			sci->weight += scj->weight;
-			sci->sample_weight += scj->sample_weight;
-
-			int size = sd->num_closure - (j+1);
-			if(size > 0) {
-				for(int k = 0; k < size; k++) {
-					scj[k] = scj[k+1];
-				}
-			}
-
-			sd->num_closure--;
-			kernel_assert(sd->num_closure >= 0);
-			j--;
-		}
-	}
+  /* merge identical closures, better when we sample a single closure at a time */
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sci = &sd->closure[i];
+
+    for (int j = i + 1; j < sd->num_closure; j++) {
+      ShaderClosure *scj = &sd->closure[j];
+
+      if (sci->type != scj->type)
+        continue;
+      if (!bsdf_merge(sci, scj))
+        continue;
+
+      sci->weight += scj->weight;
+      sci->sample_weight += scj->sample_weight;
+
+      int size = sd->num_closure - (j + 1);
+      if (size > 0) {
+        for (int k = 0; k < size; k++) {
+          scj[k] = scj[k + 1];
+        }
+      }
+
+      sd->num_closure--;
+      kernel_assert(sd->num_closure >= 0);
+      j--;
+    }
+  }
 }
-#endif  /* __BRANCHED_PATH__ || __VOLUME__ */
+#endif /* __BRANCHED_PATH__ || __VOLUME__ */
 
 /* Defensive sampling. */
 
-ccl_device_inline void shader_prepare_closures(ShaderData *sd,
-                                               ccl_addr_space PathState *state)
+ccl_device_inline void shader_prepare_closures(ShaderData *sd, ccl_addr_space PathState *state)
 {
-	/* We can likely also do defensive sampling at deeper bounces, particularly
-	 * for cases like a perfect mirror but possibly also others. This will need
-	 * a good heuristic. */
-	if(state->bounce + state->transparent_bounce == 0 && sd->num_closure > 1) {
-		float sum = 0.0f;
-
-		for(int i = 0; i < sd->num_closure; i++) {
-			ShaderClosure *sc = &sd->closure[i];
-			if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-				sum += sc->sample_weight;
-			}
-		}
-
-		for(int i = 0; i < sd->num_closure; i++) {
-			ShaderClosure *sc = &sd->closure[i];
-			if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-				sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
-			}
-		}
-	}
+  /* We can likely also do defensive sampling at deeper bounces, particularly
+   * for cases like a perfect mirror but possibly also others. This will need
+   * a good heuristic. */
+  if (state->bounce + state->transparent_bounce == 0 && sd->num_closure > 1) {
+    float sum = 0.0f;
+
+    for (int i = 0; i < sd->num_closure; i++) {
+      ShaderClosure *sc = &sd->closure[i];
+      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+        sum += sc->sample_weight;
+      }
+    }
+
+    for (int i = 0; i < sd->num_closure; i++) {
+      ShaderClosure *sc = &sd->closure[i];
+      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+        sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
+      }
+    }
+  }
 }
 
-
 /* BSDF */
 
-ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
-	const ShaderClosure *skip_sc, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
+ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg,
+                                               ShaderData *sd,
+                                               const float3 omega_in,
+                                               float *pdf,
+                                               const ShaderClosure *skip_sc,
+                                               BsdfEval *result_eval,
+                                               float sum_pdf,
+                                               float sum_sample_weight)
 {
-	/* this is the veach one-sample model with balance heuristic, some pdf
-	 * factors drop out when using balance heuristic weighting */
-	for(int i = 0; i < sd->num_closure; i++) {
-		const ShaderClosure *sc = &sd->closure[i];
+  /* this is the veach one-sample model with balance heuristic, some pdf
+   * factors drop out when using balance heuristic weighting */
+  for (int i = 0; i < sd->num_closure; i++) {
+    const ShaderClosure *sc = &sd->closure[i];
 
-		if(sc != skip_sc && CLOSURE_IS_BSDF(sc->type)) {
-			float bsdf_pdf = 0.0f;
-			float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
+    if (sc != skip_sc && CLOSURE_IS_BSDF(sc->type)) {
+      float bsdf_pdf = 0.0f;
+      float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
 
-			if(bsdf_pdf != 0.0f) {
-				bsdf_eval_accum(result_eval, sc->type, eval*sc->weight, 1.0f);
-				sum_pdf += bsdf_pdf*sc->sample_weight;
-			}
+      if (bsdf_pdf != 0.0f) {
+        bsdf_eval_accum(result_eval, sc->type, eval * sc->weight, 1.0f);
+        sum_pdf += bsdf_pdf * sc->sample_weight;
+      }
 
-			sum_sample_weight += sc->sample_weight;
-		}
-	}
+      sum_sample_weight += sc->sample_weight;
+    }
+  }
 
-	*pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
+  *pdf = (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
 }
 
 #ifdef __BRANCHED_PATH__
@@ -591,633 +609,654 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
                                                         float light_pdf,
                                                         bool use_mis)
 {
-	for(int i = 0; i < sd->num_closure; i++) {
-		const ShaderClosure *sc = &sd->closure[i];
-		if(CLOSURE_IS_BSDF(sc->type)) {
-			float bsdf_pdf = 0.0f;
-			float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
-			if(bsdf_pdf != 0.0f) {
-				float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
-				bsdf_eval_accum(result_eval,
-				                sc->type,
-				                eval * sc->weight,
-				                mis_weight);
-			}
-		}
-	}
+  for (int i = 0; i < sd->num_closure; i++) {
+    const ShaderClosure *sc = &sd->closure[i];
+    if (CLOSURE_IS_BSDF(sc->type)) {
+      float bsdf_pdf = 0.0f;
+      float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
+      if (bsdf_pdf != 0.0f) {
+        float mis_weight = use_mis ? power_heuristic(light_pdf, bsdf_pdf) : 1.0f;
+        bsdf_eval_accum(result_eval, sc->type, eval * sc->weight, mis_weight);
+      }
+    }
+  }
 }
-#endif  /* __BRANCHED_PATH__ */
-
+#endif /* __BRANCHED_PATH__ */
 
 #ifndef __KERNEL_CUDA__
 ccl_device
 #else
 ccl_device_inline
 #endif
-void shader_bsdf_eval(KernelGlobals *kg,
-                      ShaderData *sd,
-                      const float3 omega_in,
-                      BsdfEval *eval,
-                      float light_pdf,
-                      bool use_mis)
+    void
+    shader_bsdf_eval(KernelGlobals *kg,
+                     ShaderData *sd,
+                     const float3 omega_in,
+                     BsdfEval *eval,
+                     float light_pdf,
+                     bool use_mis)
 {
-	PROFILING_INIT(kg, PROFILING_CLOSURE_EVAL);
+  PROFILING_INIT(kg, PROFILING_CLOSURE_EVAL);
 
-	bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
+  bsdf_eval_init(
+      eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
 
 #ifdef __BRANCHED_PATH__
-	if(kernel_data.integrator.branched)
-		_shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
-	else
+  if (kernel_data.integrator.branched)
+    _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
+  else
 #endif
-	{
-		float pdf;
-		_shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, NULL, eval, 0.0f, 0.0f);
-		if(use_mis) {
-			float weight = power_heuristic(light_pdf, pdf);
-			bsdf_eval_mis(eval, weight);
-		}
-	}
+  {
+    float pdf;
+    _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, NULL, eval, 0.0f, 0.0f);
+    if (use_mis) {
+      float weight = power_heuristic(light_pdf, pdf);
+      bsdf_eval_mis(eval, weight);
+    }
+  }
 }
 
-ccl_device_inline const ShaderClosure *shader_bsdf_pick(ShaderData *sd,
-                                                        float *randu)
+ccl_device_inline const ShaderClosure *shader_bsdf_pick(ShaderData *sd, float *randu)
 {
-	/* Note the sampling here must match shader_bssrdf_pick,
-	 * since we reuse the same random number. */
-	int sampled = 0;
+  /* Note the sampling here must match shader_bssrdf_pick,
+   * since we reuse the same random number. */
+  int sampled = 0;
 
-	if(sd->num_closure > 1) {
-		/* Pick a BSDF or based on sample weights. */
-		float sum = 0.0f;
+  if (sd->num_closure > 1) {
+    /* Pick a BSDF or based on sample weights. */
+    float sum = 0.0f;
 
-		for(int i = 0; i < sd->num_closure; i++) {
-			const ShaderClosure *sc = &sd->closure[i];
+    for (int i = 0; i < sd->num_closure; i++) {
+      const ShaderClosure *sc = &sd->closure[i];
 
-			if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-				sum += sc->sample_weight;
-			}
-		}
+      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+        sum += sc->sample_weight;
+      }
+    }
 
-		float r = (*randu)*sum;
-		float partial_sum = 0.0f;
+    float r = (*randu) * sum;
+    float partial_sum = 0.0f;
 
-		for(int i = 0; i < sd->num_closure; i++) {
-			const ShaderClosure *sc = &sd->closure[i];
+    for (int i = 0; i < sd->num_closure; i++) {
+      const ShaderClosure *sc = &sd->closure[i];
 
-			if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-				float next_sum = partial_sum + sc->sample_weight;
+      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+        float next_sum = partial_sum + sc->sample_weight;
 
-				if(r < next_sum) {
-					sampled = i;
+        if (r < next_sum) {
+          sampled = i;
 
-					/* Rescale to reuse for direction sample, to better
-					 * preserve stratifaction. */
-					*randu = (r - partial_sum) / sc->sample_weight;
-					break;
-				}
+          /* Rescale to reuse for direction sample, to better
+           * preserve stratifaction. */
+          *randu = (r - partial_sum) / sc->sample_weight;
+          break;
+        }
 
-				partial_sum = next_sum;
-			}
-		}
-	}
+        partial_sum = next_sum;
+      }
+    }
+  }
 
-	const ShaderClosure *sc = &sd->closure[sampled];
-	return CLOSURE_IS_BSDF(sc->type)? sc: NULL;
+  const ShaderClosure *sc = &sd->closure[sampled];
+  return CLOSURE_IS_BSDF(sc->type) ? sc : NULL;
 }
 
 ccl_device_inline const ShaderClosure *shader_bssrdf_pick(ShaderData *sd,
                                                           ccl_addr_space float3 *throughput,
                                                           float *randu)
 {
-	/* Note the sampling here must match shader_bsdf_pick,
-	 * since we reuse the same random number. */
-	int sampled = 0;
-
-	if(sd->num_closure > 1) {
-		/* Pick a BSDF or BSSRDF or based on sample weights. */
-		float sum_bsdf = 0.0f;
-		float sum_bssrdf = 0.0f;
-
-		for(int i = 0; i < sd->num_closure; i++) {
-			const ShaderClosure *sc = &sd->closure[i];
-
-			if(CLOSURE_IS_BSDF(sc->type)) {
-				sum_bsdf += sc->sample_weight;
-			}
-			else if(CLOSURE_IS_BSSRDF(sc->type)) {
-				sum_bssrdf += sc->sample_weight;
-			}
-		}
-
-		float r = (*randu)*(sum_bsdf + sum_bssrdf);
-		float partial_sum = 0.0f;
-
-		for(int i = 0; i < sd->num_closure; i++) {
-			const ShaderClosure *sc = &sd->closure[i];
-
-			if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-				float next_sum = partial_sum + sc->sample_weight;
-
-				if(r < next_sum) {
-					if(CLOSURE_IS_BSDF(sc->type)) {
-						*throughput *= (sum_bsdf + sum_bssrdf) / sum_bsdf;
-						return NULL;
-					}
-					else {
-						*throughput *= (sum_bsdf + sum_bssrdf) / sum_bssrdf;
-						sampled = i;
-
-						/* Rescale to reuse for direction sample, to better
-						 * preserve stratifaction. */
-						*randu = (r - partial_sum) / sc->sample_weight;
-						break;
-					}
-				}
-
-				partial_sum = next_sum;
-			}
-		}
-	}
-
-	const ShaderClosure *sc = &sd->closure[sampled];
-	return CLOSURE_IS_BSSRDF(sc->type)? sc: NULL;
+  /* Note the sampling here must match shader_bsdf_pick,
+   * since we reuse the same random number. */
+  int sampled = 0;
+
+  if (sd->num_closure > 1) {
+    /* Pick a BSDF or BSSRDF or based on sample weights. */
+    float sum_bsdf = 0.0f;
+    float sum_bssrdf = 0.0f;
+
+    for (int i = 0; i < sd->num_closure; i++) {
+      const ShaderClosure *sc = &sd->closure[i];
+
+      if (CLOSURE_IS_BSDF(sc->type)) {
+        sum_bsdf += sc->sample_weight;
+      }
+      else if (CLOSURE_IS_BSSRDF(sc->type)) {
+        sum_bssrdf += sc->sample_weight;
+      }
+    }
+
+    float r = (*randu) * (sum_bsdf + sum_bssrdf);
+    float partial_sum = 0.0f;
+
+    for (int i = 0; i < sd->num_closure; i++) {
+      const ShaderClosure *sc = &sd->closure[i];
+
+      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+        float next_sum = partial_sum + sc->sample_weight;
+
+        if (r < next_sum) {
+          if (CLOSURE_IS_BSDF(sc->type)) {
+            *throughput *= (sum_bsdf + sum_bssrdf) / sum_bsdf;
+            return NULL;
+          }
+          else {
+            *throughput *= (sum_bsdf + sum_bssrdf) / sum_bssrdf;
+            sampled = i;
+
+            /* Rescale to reuse for direction sample, to better
+             * preserve stratifaction. */
+            *randu = (r - partial_sum) / sc->sample_weight;
+            break;
+          }
+        }
+
+        partial_sum = next_sum;
+      }
+    }
+  }
+
+  const ShaderClosure *sc = &sd->closure[sampled];
+  return CLOSURE_IS_BSSRDF(sc->type) ? sc : NULL;
 }
 
 ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
                                          ShaderData *sd,
-                                         float randu, float randv,
+                                         float randu,
+                                         float randv,
                                          BsdfEval *bsdf_eval,
                                          float3 *omega_in,
                                          differential3 *domega_in,
                                          float *pdf)
 {
-	PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
+  PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
 
-	const ShaderClosure *sc = shader_bsdf_pick(sd, &randu);
-	if(sc == NULL) {
-		*pdf = 0.0f;
-		return LABEL_NONE;
-	}
+  const ShaderClosure *sc = shader_bsdf_pick(sd, &randu);
+  if (sc == NULL) {
+    *pdf = 0.0f;
+    return LABEL_NONE;
+  }
 
-	/* BSSRDF should already have been handled elsewhere. */
-	kernel_assert(CLOSURE_IS_BSDF(sc->type));
+  /* BSSRDF should already have been handled elsewhere. */
+  kernel_assert(CLOSURE_IS_BSDF(sc->type));
 
-	int label;
-	float3 eval;
+  int label;
+  float3 eval;
 
-	*pdf = 0.0f;
-	label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
+  *pdf = 0.0f;
+  label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
 
-	if(*pdf != 0.0f) {
-		bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
+  if (*pdf != 0.0f) {
+    bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight, kernel_data.film.use_light_pass);
 
-		if(sd->num_closure > 1) {
-			float sweight = sc->sample_weight;
-			_shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sc, bsdf_eval, *pdf*sweight, sweight);
-		}
-	}
+    if (sd->num_closure > 1) {
+      float sweight = sc->sample_weight;
+      _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sc, bsdf_eval, *pdf * sweight, sweight);
+    }
+  }
 
-	return label;
+  return label;
 }
 
-ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
-	const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
-	float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg,
+                                          ShaderData *sd,
+                                          const ShaderClosure *sc,
+                                          float randu,
+                                          float randv,
+                                          BsdfEval *bsdf_eval,
+                                          float3 *omega_in,
+                                          differential3 *domega_in,
+                                          float *pdf)
 {
-	PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
+  PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
 
-	int label;
-	float3 eval;
+  int label;
+  float3 eval;
 
-	*pdf = 0.0f;
-	label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
+  *pdf = 0.0f;
+  label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
 
-	if(*pdf != 0.0f)
-		bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
+  if (*pdf != 0.0f)
+    bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight, kernel_data.film.use_light_pass);
 
-	return label;
+  return label;
 }
 
 ccl_device float shader_bsdf_average_roughness(ShaderData *sd)
 {
-	float roughness = 0.0f;
-	float sum_weight = 0.0f;
-
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
-
-		if(CLOSURE_IS_BSDF(sc->type)) {
-			/* sqrt once to undo the squaring from multiplying roughness on the
-			 * two axes, and once for the squared roughness convention. */
-			float weight = fabsf(average(sc->weight));
-			roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc)));
-			sum_weight += weight;
-		}
-	}
-
-	return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
+  float roughness = 0.0f;
+  float sum_weight = 0.0f;
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
+
+    if (CLOSURE_IS_BSDF(sc->type)) {
+      /* sqrt once to undo the squaring from multiplying roughness on the
+       * two axes, and once for the squared roughness convention. */
+      float weight = fabsf(average(sc->weight));
+      roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc)));
+      sum_weight += weight;
+    }
+  }
+
+  return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
 }
 
 ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
 {
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
 
-		if(CLOSURE_IS_BSDF(sc->type))
-			bsdf_blur(kg, sc, roughness);
-	}
+    if (CLOSURE_IS_BSDF(sc->type))
+      bsdf_blur(kg, sc, roughness);
+  }
 }
 
 ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd)
 {
-	if(sd->flag & SD_HAS_ONLY_VOLUME) {
-		return make_float3(1.0f, 1.0f, 1.0f);
-	}
-	else if(sd->flag & SD_TRANSPARENT) {
-		return sd->closure_transparent_extinction;
-	}
-	else {
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  if (sd->flag & SD_HAS_ONLY_VOLUME) {
+    return make_float3(1.0f, 1.0f, 1.0f);
+  }
+  else if (sd->flag & SD_TRANSPARENT) {
+    return sd->closure_transparent_extinction;
+  }
+  else {
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
 ccl_device void shader_bsdf_disable_transparency(KernelGlobals *kg, ShaderData *sd)
 {
-	if(sd->flag & SD_TRANSPARENT) {
-		for(int i = 0; i < sd->num_closure; i++) {
-			ShaderClosure *sc = &sd->closure[i];
-
-			if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
-				sc->sample_weight = 0.0f;
-				sc->weight = make_float3(0.0f, 0.0f, 0.0f);
-			}
-		}
-
-		sd->flag &= ~SD_TRANSPARENT;
-	}
+  if (sd->flag & SD_TRANSPARENT) {
+    for (int i = 0; i < sd->num_closure; i++) {
+      ShaderClosure *sc = &sd->closure[i];
+
+      if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
+        sc->sample_weight = 0.0f;
+        sc->weight = make_float3(0.0f, 0.0f, 0.0f);
+      }
+    }
+
+    sd->flag &= ~SD_TRANSPARENT;
+  }
 }
 
 ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
 {
-	float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
+  float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
 
-	alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
-	alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
+  alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
+  alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
 
-	return alpha;
+  return alpha;
 }
 
 ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
 {
-	float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+  float3 eval = make_float3(0.0f, 0.0f, 0.0f);
 
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
 
-		if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
-			eval += sc->weight;
-	}
+    if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+      eval += sc->weight;
+  }
 
-	return eval;
+  return eval;
 }
 
 ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
 {
-	float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+  float3 eval = make_float3(0.0f, 0.0f, 0.0f);
 
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
 
-		if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
-			eval += sc->weight;
-	}
+    if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+      eval += sc->weight;
+  }
 
-	return eval;
+  return eval;
 }
 
 ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
 {
-	float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+  float3 eval = make_float3(0.0f, 0.0f, 0.0f);
 
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
 
-		if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
-			eval += sc->weight;
-	}
+    if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
+      eval += sc->weight;
+  }
 
-	return eval;
+  return eval;
 }
 
 ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
 {
-	float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+  float3 eval = make_float3(0.0f, 0.0f, 0.0f);
 
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
 
-		if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
-			eval += sc->weight;
-	}
+    if (CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
+      eval += sc->weight;
+  }
 
-	return eval;
+  return eval;
 }
 
 ccl_device float3 shader_bsdf_average_normal(KernelGlobals *kg, ShaderData *sd)
 {
-	float3 N = make_float3(0.0f, 0.0f, 0.0f);
+  float3 N = make_float3(0.0f, 0.0f, 0.0f);
 
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
-		if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
-			N += sc->N*fabsf(average(sc->weight));
-	}
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
+    if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
+      N += sc->N * fabsf(average(sc->weight));
+  }
 
-	return (is_zero(N))? sd->N : normalize(N);
+  return (is_zero(N)) ? sd->N : normalize(N);
 }
 
 ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
 {
-	float3 eval = make_float3(0.0f, 0.0f, 0.0f);
-	float3 N = make_float3(0.0f, 0.0f, 0.0f);
+  float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+  float3 N = make_float3(0.0f, 0.0f, 0.0f);
 
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
 
-		if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
-			const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
-			eval += sc->weight*ao_factor;
-			N += bsdf->N*fabsf(average(sc->weight));
-		}
-	}
+    if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
+      const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+      eval += sc->weight * ao_factor;
+      N += bsdf->N * fabsf(average(sc->weight));
+    }
+  }
 
-	*N_ = (is_zero(N))? sd->N : normalize(N);
-	return eval;
+  *N_ = (is_zero(N)) ? sd->N : normalize(N);
+  return eval;
 }
 
 #ifdef __SUBSURFACE__
 ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
 {
-	float3 eval = make_float3(0.0f, 0.0f, 0.0f);
-	float3 N = make_float3(0.0f, 0.0f, 0.0f);
-	float texture_blur = 0.0f, weight_sum = 0.0f;
+  float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+  float3 N = make_float3(0.0f, 0.0f, 0.0f);
+  float texture_blur = 0.0f, weight_sum = 0.0f;
 
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
 
-		if(CLOSURE_IS_BSSRDF(sc->type)) {
-			const Bssrdf *bssrdf = (const Bssrdf*)sc;
-			float avg_weight = fabsf(average(sc->weight));
+    if (CLOSURE_IS_BSSRDF(sc->type)) {
+      const Bssrdf *bssrdf = (const Bssrdf *)sc;
+      float avg_weight = fabsf(average(sc->weight));
 
-			N += bssrdf->N*avg_weight;
-			eval += sc->weight;
-			texture_blur += bssrdf->texture_blur*avg_weight;
-			weight_sum += avg_weight;
-		}
-	}
+      N += bssrdf->N * avg_weight;
+      eval += sc->weight;
+      texture_blur += bssrdf->texture_blur * avg_weight;
+      weight_sum += avg_weight;
+    }
+  }
 
-	if(N_)
-		*N_ = (is_zero(N))? sd->N: normalize(N);
+  if (N_)
+    *N_ = (is_zero(N)) ? sd->N : normalize(N);
 
-	if(texture_blur_)
-		*texture_blur_ = safe_divide(texture_blur, weight_sum);
+  if (texture_blur_)
+    *texture_blur_ = safe_divide(texture_blur, weight_sum);
 
-	return eval;
+  return eval;
 }
-#endif  /* __SUBSURFACE__ */
+#endif /* __SUBSURFACE__ */
 
 /* Constant emission optimization */
 
 ccl_device bool shader_constant_emission_eval(KernelGlobals *kg, int shader, float3 *eval)
 {
-	int shader_index = shader & SHADER_MASK;
-	int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags;
+  int shader_index = shader & SHADER_MASK;
+  int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags;
 
-	if (shader_flag & SD_HAS_CONSTANT_EMISSION) {
-		*eval = make_float3(
-			kernel_tex_fetch(__shaders, shader_index).constant_emission[0],
-			kernel_tex_fetch(__shaders, shader_index).constant_emission[1],
-			kernel_tex_fetch(__shaders, shader_index).constant_emission[2]);
+  if (shader_flag & SD_HAS_CONSTANT_EMISSION) {
+    *eval = make_float3(kernel_tex_fetch(__shaders, shader_index).constant_emission[0],
+                        kernel_tex_fetch(__shaders, shader_index).constant_emission[1],
+                        kernel_tex_fetch(__shaders, shader_index).constant_emission[2]);
 
-		return true;
-	}
+    return true;
+  }
 
-	return false;
+  return false;
 }
 
 /* Background */
 
 ccl_device float3 shader_background_eval(ShaderData *sd)
 {
-	if(sd->flag & SD_EMISSION) {
-		return sd->closure_emission_background;
-	}
-	else {
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  if (sd->flag & SD_EMISSION) {
+    return sd->closure_emission_background;
+  }
+  else {
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
 /* Emission */
 
 ccl_device float3 shader_emissive_eval(ShaderData *sd)
 {
-	if(sd->flag & SD_EMISSION) {
-		return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
-	}
-	else {
-		return make_float3(0.0f, 0.0f, 0.0f);
-	}
+  if (sd->flag & SD_EMISSION) {
+    return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
+  }
+  else {
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
 /* Holdout */
 
 ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
 {
-	float3 weight = make_float3(0.0f, 0.0f, 0.0f);
+  float3 weight = make_float3(0.0f, 0.0f, 0.0f);
 
-	for(int i = 0; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
 
-		if(CLOSURE_IS_HOLDOUT(sc->type))
-			weight += sc->weight;
-	}
+    if (CLOSURE_IS_HOLDOUT(sc->type))
+      weight += sc->weight;
+  }
 
-	return weight;
+  return weight;
 }
 
 /* Surface Evaluation */
 
-ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
-	ccl_addr_space PathState *state, int path_flag)
+ccl_device void shader_eval_surface(KernelGlobals *kg,
+                                    ShaderData *sd,
+                                    ccl_addr_space PathState *state,
+                                    int path_flag)
 {
-	PROFILING_INIT(kg, PROFILING_SHADER_EVAL);
-
-	/* If path is being terminated, we are tracing a shadow ray or evaluating
-	 * emission, then we don't need to store closures. The emission and shadow
-	 * shader data also do not have a closure array to save GPU memory. */
-	int max_closures;
-	if(path_flag & (PATH_RAY_TERMINATE|PATH_RAY_SHADOW|PATH_RAY_EMISSION)) {
-		max_closures = 0;
-	}
-	else {
-		max_closures = kernel_data.integrator.max_closures;
-	}
-
-	sd->num_closure = 0;
-	sd->num_closure_left = max_closures;
+  PROFILING_INIT(kg, PROFILING_SHADER_EVAL);
+
+  /* If path is being terminated, we are tracing a shadow ray or evaluating
+   * emission, then we don't need to store closures. The emission and shadow
+   * shader data also do not have a closure array to save GPU memory. */
+  int max_closures;
+  if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+    max_closures = 0;
+  }
+  else {
+    max_closures = kernel_data.integrator.max_closures;
+  }
+
+  sd->num_closure = 0;
+  sd->num_closure_left = max_closures;
 
 #ifdef __OSL__
-	if(kg->osl) {
-		if (sd->object == OBJECT_NONE) {
-			OSLShader::eval_background(kg, sd, state, path_flag);
-		}
-		else {
-			OSLShader::eval_surface(kg, sd, state, path_flag);
-		}
-	}
-	else
+  if (kg->osl) {
+    if (sd->object == OBJECT_NONE) {
+      OSLShader::eval_background(kg, sd, state, path_flag);
+    }
+    else {
+      OSLShader::eval_surface(kg, sd, state, path_flag);
+    }
+  }
+  else
 #endif
-	{
+  {
 #ifdef __SVM__
-		svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
+    svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
 #else
-		if(sd->object == OBJECT_NONE) {
-			sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f);
-			sd->flag |= SD_EMISSION;
-		}
-		else {
-			DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd,
-			                                             sizeof(DiffuseBsdf),
-			                                             make_float3(0.8f, 0.8f, 0.8f));
-			if(bsdf != NULL) {
-				bsdf->N = sd->N;
-				sd->flag |= bsdf_diffuse_setup(bsdf);
-			}
-		}
+    if (sd->object == OBJECT_NONE) {
+      sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f);
+      sd->flag |= SD_EMISSION;
+    }
+    else {
+      DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(
+          sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f));
+      if (bsdf != NULL) {
+        bsdf->N = sd->N;
+        sd->flag |= bsdf_diffuse_setup(bsdf);
+      }
+    }
 #endif
-	}
+  }
 
-	if(sd->flag & SD_BSDF_NEEDS_LCG) {
-		sd->lcg_state = lcg_state_init_addrspace(state, 0xb4bc3953);
-	}
+  if (sd->flag & SD_BSDF_NEEDS_LCG) {
+    sd->lcg_state = lcg_state_init_addrspace(state, 0xb4bc3953);
+  }
 }
 
 /* Volume */
 
 #ifdef __VOLUME__
 
-ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
-	int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
+ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd,
+                                                       const float3 omega_in,
+                                                       float *pdf,
+                                                       int skip_phase,
+                                                       BsdfEval *result_eval,
+                                                       float sum_pdf,
+                                                       float sum_sample_weight)
 {
-	for(int i = 0; i < sd->num_closure; i++) {
-		if(i == skip_phase)
-			continue;
+  for (int i = 0; i < sd->num_closure; i++) {
+    if (i == skip_phase)
+      continue;
 
-		const ShaderClosure *sc = &sd->closure[i];
+    const ShaderClosure *sc = &sd->closure[i];
 
-		if(CLOSURE_IS_PHASE(sc->type)) {
-			float phase_pdf = 0.0f;
-			float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
+    if (CLOSURE_IS_PHASE(sc->type)) {
+      float phase_pdf = 0.0f;
+      float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
 
-			if(phase_pdf != 0.0f) {
-				bsdf_eval_accum(result_eval, sc->type, eval, 1.0f);
-				sum_pdf += phase_pdf*sc->sample_weight;
-			}
+      if (phase_pdf != 0.0f) {
+        bsdf_eval_accum(result_eval, sc->type, eval, 1.0f);
+        sum_pdf += phase_pdf * sc->sample_weight;
+      }
 
-			sum_sample_weight += sc->sample_weight;
-		}
-	}
+      sum_sample_weight += sc->sample_weight;
+    }
+  }
 
-	*pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
+  *pdf = (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
 }
 
-ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
-	const float3 omega_in, BsdfEval *eval, float *pdf)
+ccl_device void shader_volume_phase_eval(
+    KernelGlobals *kg, const ShaderData *sd, const float3 omega_in, BsdfEval *eval, float *pdf)
 {
-	PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_EVAL);
+  PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_EVAL);
 
-	bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
+  bsdf_eval_init(
+      eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
 
-	_shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
+  _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
 }
 
-ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
-	float randu, float randv, BsdfEval *phase_eval,
-	float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device int shader_volume_phase_sample(KernelGlobals *kg,
+                                          const ShaderData *sd,
+                                          float randu,
+                                          float randv,
+                                          BsdfEval *phase_eval,
+                                          float3 *omega_in,
+                                          differential3 *domega_in,
+                                          float *pdf)
 {
-	PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
+  PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
 
-	int sampled = 0;
+  int sampled = 0;
 
-	if(sd->num_closure > 1) {
-		/* pick a phase closure based on sample weights */
-		float sum = 0.0f;
+  if (sd->num_closure > 1) {
+    /* pick a phase closure based on sample weights */
+    float sum = 0.0f;
 
-		for(sampled = 0; sampled < sd->num_closure; sampled++) {
-			const ShaderClosure *sc = &sd->closure[sampled];
+    for (sampled = 0; sampled < sd->num_closure; sampled++) {
+      const ShaderClosure *sc = &sd->closure[sampled];
 
-			if(CLOSURE_IS_PHASE(sc->type))
-				sum += sc->sample_weight;
-		}
+      if (CLOSURE_IS_PHASE(sc->type))
+        sum += sc->sample_weight;
+    }
 
-		float r = randu*sum;
-		float partial_sum = 0.0f;
+    float r = randu * sum;
+    float partial_sum = 0.0f;
 
-		for(sampled = 0; sampled < sd->num_closure; sampled++) {
-			const ShaderClosure *sc = &sd->closure[sampled];
+    for (sampled = 0; sampled < sd->num_closure; sampled++) {
+      const ShaderClosure *sc = &sd->closure[sampled];
 
-			if(CLOSURE_IS_PHASE(sc->type)) {
-				float next_sum = partial_sum + sc->sample_weight;
+      if (CLOSURE_IS_PHASE(sc->type)) {
+        float next_sum = partial_sum + sc->sample_weight;
 
-				if(r <= next_sum) {
-					/* Rescale to reuse for BSDF direction sample. */
-					randu = (r - partial_sum) / sc->sample_weight;
-					break;
-				}
+        if (r <= next_sum) {
+          /* Rescale to reuse for BSDF direction sample. */
+          randu = (r - partial_sum) / sc->sample_weight;
+          break;
+        }
 
-				partial_sum = next_sum;
-			}
-		}
+        partial_sum = next_sum;
+      }
+    }
 
-		if(sampled == sd->num_closure) {
-			*pdf = 0.0f;
-			return LABEL_NONE;
-		}
-	}
+    if (sampled == sd->num_closure) {
+      *pdf = 0.0f;
+      return LABEL_NONE;
+    }
+  }
 
-	/* todo: this isn't quite correct, we don't weight anisotropy properly
-	 * depending on color channels, even if this is perhaps not a common case */
-	const ShaderClosure *sc = &sd->closure[sampled];
-	int label;
-	float3 eval;
+  /* todo: this isn't quite correct, we don't weight anisotropy properly
+   * depending on color channels, even if this is perhaps not a common case */
+  const ShaderClosure *sc = &sd->closure[sampled];
+  int label;
+  float3 eval;
 
-	*pdf = 0.0f;
-	label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
+  *pdf = 0.0f;
+  label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
 
-	if(*pdf != 0.0f) {
-		bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
-	}
+  if (*pdf != 0.0f) {
+    bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
+  }
 
-	return label;
+  return label;
 }
 
-ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
-	const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
-	float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device int shader_phase_sample_closure(KernelGlobals *kg,
+                                           const ShaderData *sd,
+                                           const ShaderClosure *sc,
+                                           float randu,
+                                           float randv,
+                                           BsdfEval *phase_eval,
+                                           float3 *omega_in,
+                                           differential3 *domega_in,
+                                           float *pdf)
 {
-	PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
+  PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
 
-	int label;
-	float3 eval;
+  int label;
+  float3 eval;
 
-	*pdf = 0.0f;
-	label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
+  *pdf = 0.0f;
+  label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
 
-	if(*pdf != 0.0f)
-		bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
+  if (*pdf != 0.0f)
+    bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
 
-	return label;
+  return label;
 }
 
 /* Volume Evaluation */
@@ -1228,83 +1267,85 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
                                           ccl_addr_space VolumeStack *stack,
                                           int path_flag)
 {
-	/* If path is being terminated, we are tracing a shadow ray or evaluating
-	 * emission, then we don't need to store closures. The emission and shadow
-	 * shader data also do not have a closure array to save GPU memory. */
-	int max_closures;
-	if(path_flag & (PATH_RAY_TERMINATE|PATH_RAY_SHADOW|PATH_RAY_EMISSION)) {
-		max_closures = 0;
-	}
-	else {
-		max_closures = kernel_data.integrator.max_closures;
-	}
-
-	/* reset closures once at the start, we will be accumulating the closures
-	 * for all volumes in the stack into a single array of closures */
-	sd->num_closure = 0;
-	sd->num_closure_left = max_closures;
-	sd->flag = 0;
-	sd->object_flag = 0;
-
-	for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
-		/* setup shaderdata from stack. it's mostly setup already in
-		 * shader_setup_from_volume, this switching should be quick */
-		sd->object = stack[i].object;
-		sd->lamp = LAMP_NONE;
-		sd->shader = stack[i].shader;
-
-		sd->flag &= ~SD_SHADER_FLAGS;
-		sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-		sd->object_flag &= ~SD_OBJECT_FLAGS;
-
-		if(sd->object != OBJECT_NONE) {
-			sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
-
-#ifdef __OBJECT_MOTION__
-			/* todo: this is inefficient for motion blur, we should be
-			 * caching matrices instead of recomputing them each step */
-			shader_setup_object_transforms(kg, sd, sd->time);
-#endif
-		}
-
-		/* evaluate shader */
-#ifdef __SVM__
-#  ifdef __OSL__
-		if(kg->osl) {
-			OSLShader::eval_volume(kg, sd, state, path_flag);
-		}
-		else
+  /* If path is being terminated, we are tracing a shadow ray or evaluating
+   * emission, then we don't need to store closures. The emission and shadow
+   * shader data also do not have a closure array to save GPU memory. */
+  int max_closures;
+  if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+    max_closures = 0;
+  }
+  else {
+    max_closures = kernel_data.integrator.max_closures;
+  }
+
+  /* reset closures once at the start, we will be accumulating the closures
+   * for all volumes in the stack into a single array of closures */
+  sd->num_closure = 0;
+  sd->num_closure_left = max_closures;
+  sd->flag = 0;
+  sd->object_flag = 0;
+
+  for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
+    /* setup shaderdata from stack. it's mostly setup already in
+     * shader_setup_from_volume, this switching should be quick */
+    sd->object = stack[i].object;
+    sd->lamp = LAMP_NONE;
+    sd->shader = stack[i].shader;
+
+    sd->flag &= ~SD_SHADER_FLAGS;
+    sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+    sd->object_flag &= ~SD_OBJECT_FLAGS;
+
+    if (sd->object != OBJECT_NONE) {
+      sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
+
+#  ifdef __OBJECT_MOTION__
+      /* todo: this is inefficient for motion blur, we should be
+       * caching matrices instead of recomputing them each step */
+      shader_setup_object_transforms(kg, sd, sd->time);
+#  endif
+    }
+
+    /* evaluate shader */
+#  ifdef __SVM__
+#    ifdef __OSL__
+    if (kg->osl) {
+      OSLShader::eval_volume(kg, sd, state, path_flag);
+    }
+    else
+#    endif
+    {
+      svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
+    }
 #  endif
-		{
-			svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
-		}
-#endif
 
-		/* merge closures to avoid exceeding number of closures limit */
-		if(i > 0)
-			shader_merge_closures(sd);
-	}
+    /* merge closures to avoid exceeding number of closures limit */
+    if (i > 0)
+      shader_merge_closures(sd);
+  }
 }
 
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 /* Displacement Evaluation */
 
-ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state)
+ccl_device void shader_eval_displacement(KernelGlobals *kg,
+                                         ShaderData *sd,
+                                         ccl_addr_space PathState *state)
 {
-	sd->num_closure = 0;
-	sd->num_closure_left = 0;
+  sd->num_closure = 0;
+  sd->num_closure_left = 0;
 
-	/* this will modify sd->P */
+  /* this will modify sd->P */
 #ifdef __SVM__
 #  ifdef __OSL__
-	if(kg->osl)
-		OSLShader::eval_displacement(kg, sd, state);
-	else
+  if (kg->osl)
+    OSLShader::eval_displacement(kg, sd, state);
+  else
 #  endif
-	{
-		svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
-	}
+  {
+    svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
+  }
 #endif
 }
 
@@ -1313,29 +1354,29 @@ ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_
 #ifdef __TRANSPARENT_SHADOWS__
 ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
 {
-	int prim = kernel_tex_fetch(__prim_index, isect->prim);
-	int shader = 0;
+  int prim = kernel_tex_fetch(__prim_index, isect->prim);
+  int shader = 0;
 
-#ifdef __HAIR__
-	if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
-#endif
-		shader = kernel_tex_fetch(__tri_shader, prim);
-#ifdef __HAIR__
-	}
-	else {
-		float4 str = kernel_tex_fetch(__curves, prim);
-		shader = __float_as_int(str.z);
-	}
-#endif
-	int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+#  ifdef __HAIR__
+  if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
+#  endif
+    shader = kernel_tex_fetch(__tri_shader, prim);
+#  ifdef __HAIR__
+  }
+  else {
+    float4 str = kernel_tex_fetch(__curves, prim);
+    shader = __float_as_int(str.z);
+  }
+#  endif
+  int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
 
-	return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
+  return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
 }
-#endif  /* __TRANSPARENT_SHADOWS__ */
+#endif /* __TRANSPARENT_SHADOWS__ */
 
 ccl_device float shader_cryptomatte_id(KernelGlobals *kg, int shader)
 {
-	return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id;
+  return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index fafa3ad4bfa..6af1369feab 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
 typedef struct VolumeState {
 #  ifdef __SPLIT_KERNEL__
 #  else
-	PathState ps;
+  PathState ps;
 #  endif
 } VolumeState;
 
@@ -28,77 +28,70 @@ typedef struct VolumeState {
 #  ifdef __SPLIT_KERNEL__
 ccl_addr_space
 #  endif
-ccl_device_inline PathState *shadow_blocked_volume_path_state(
-        KernelGlobals *kg,
-        VolumeState *volume_state,
-        ccl_addr_space PathState *state,
-        ShaderData *sd,
-        Ray *ray)
+    ccl_device_inline PathState *
+    shadow_blocked_volume_path_state(KernelGlobals *kg,
+                                     VolumeState *volume_state,
+                                     ccl_addr_space PathState *state,
+                                     ShaderData *sd,
+                                     Ray *ray)
 {
 #  ifdef __SPLIT_KERNEL__
-	ccl_addr_space PathState *ps =
-	        &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
+  ccl_addr_space PathState *ps =
+      &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
 #  else
-	PathState *ps = &volume_state->ps;
+  PathState *ps = &volume_state->ps;
 #  endif
-	*ps = *state;
-	/* We are checking for shadow on the "other" side of the surface, so need
-	 * to discard volume we are currently at.
-	 */
-	if(dot(sd->Ng, ray->D) < 0.0f) {
-		kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack);
-	}
-	return ps;
+  *ps = *state;
+  /* We are checking for shadow on the "other" side of the surface, so need
+   * to discard volume we are currently at.
+   */
+  if (dot(sd->Ng, ray->D) < 0.0f) {
+    kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack);
+  }
+  return ps;
 }
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 /* Attenuate throughput accordingly to the given intersection event.
  * Returns true if the throughput is zero and traversal can be aborted.
  */
 ccl_device_forceinline bool shadow_handle_transparent_isect(
-        KernelGlobals *kg,
-        ShaderData *shadow_sd,
-        ccl_addr_space PathState *state,
-#    ifdef __VOLUME__
-        ccl_addr_space struct PathState *volume_state,
-#    endif
-        Intersection *isect,
-        Ray *ray,
-        float3 *throughput)
+    KernelGlobals *kg,
+    ShaderData *shadow_sd,
+    ccl_addr_space PathState *state,
+#ifdef __VOLUME__
+    ccl_addr_space struct PathState *volume_state,
+#endif
+    Intersection *isect,
+    Ray *ray,
+    float3 *throughput)
 {
 #ifdef __VOLUME__
-	/* Attenuation between last surface and next surface. */
-	if(volume_state->volume_stack[0].shader != SHADER_NONE) {
-		Ray segment_ray = *ray;
-		segment_ray.t = isect->t;
-		kernel_volume_shadow(kg,
-		                     shadow_sd,
-		                     volume_state,
-		                     &segment_ray,
-		                     throughput);
-	}
+  /* Attenuation between last surface and next surface. */
+  if (volume_state->volume_stack[0].shader != SHADER_NONE) {
+    Ray segment_ray = *ray;
+    segment_ray.t = isect->t;
+    kernel_volume_shadow(kg, shadow_sd, volume_state, &segment_ray, throughput);
+  }
 #endif
-	/* Setup shader data at surface. */
-	shader_setup_from_ray(kg, shadow_sd, isect, ray);
-	/* Attenuation from transparent surface. */
-	if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
-		path_state_modify_bounce(state, true);
-		shader_eval_surface(kg,
-		                    shadow_sd,
-		                    state,
-		                    PATH_RAY_SHADOW);
-		path_state_modify_bounce(state, false);
-		*throughput *= shader_bsdf_transparency(kg, shadow_sd);
-	}
-	/* Stop if all light is blocked. */
-	if(is_zero(*throughput)) {
-		return true;
-	}
+  /* Setup shader data at surface. */
+  shader_setup_from_ray(kg, shadow_sd, isect, ray);
+  /* Attenuation from transparent surface. */
+  if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
+    path_state_modify_bounce(state, true);
+    shader_eval_surface(kg, shadow_sd, state, PATH_RAY_SHADOW);
+    path_state_modify_bounce(state, false);
+    *throughput *= shader_bsdf_transparency(kg, shadow_sd);
+  }
+  /* Stop if all light is blocked. */
+  if (is_zero(*throughput)) {
+    return true;
+  }
 #ifdef __VOLUME__
-	/* Exit/enter volume. */
-	kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
+  /* Exit/enter volume. */
+  kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
 #endif
-	return false;
+  return false;
 }
 
 /* Special version which only handles opaque shadows. */
@@ -110,19 +103,15 @@ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
                                       Intersection *isect,
                                       float3 *shadow)
 {
-	const bool blocked = scene_intersect(kg,
-	                                     *ray,
-	                                     visibility & PATH_RAY_SHADOW_OPAQUE,
-	                                     isect,
-	                                     NULL,
-	                                     0.0f, 0.0f);
+  const bool blocked = scene_intersect(
+      kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f);
 #ifdef __VOLUME__
-	if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
-		/* Apply attenuation from current volume shader. */
-		kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
-	}
+  if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
+    /* Apply attenuation from current volume shader. */
+    kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
+  }
 #endif
-	return blocked;
+  return blocked;
 }
 
 #ifdef __TRANSPARENT_SHADOWS__
@@ -169,94 +158,80 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
                                                     uint max_hits,
                                                     float3 *shadow)
 {
-	/* Intersect to find an opaque surface, or record all transparent
-	 * surface hits.
-	 */
-	uint num_hits;
-	const bool blocked = scene_intersect_shadow_all(kg,
-	                                                ray,
-	                                                hits,
-	                                                visibility,
-	                                                max_hits,
-	                                                &num_hits);
+  /* Intersect to find an opaque surface, or record all transparent
+   * surface hits.
+   */
+  uint num_hits;
+  const bool blocked = scene_intersect_shadow_all(kg, ray, hits, visibility, max_hits, &num_hits);
 #    ifdef __VOLUME__
-	VolumeState volume_state;
+  VolumeState volume_state;
 #    endif
-	/* If no opaque surface found but we did find transparent hits,
-	 * shade them.
-	 */
-	if(!blocked && num_hits > 0) {
-		float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-		float3 Pend = ray->P + ray->D*ray->t;
-		float last_t = 0.0f;
-		int bounce = state->transparent_bounce;
-		Intersection *isect = hits;
+  /* If no opaque surface found but we did find transparent hits,
+   * shade them.
+   */
+  if (!blocked && num_hits > 0) {
+    float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+    float3 Pend = ray->P + ray->D * ray->t;
+    float last_t = 0.0f;
+    int bounce = state->transparent_bounce;
+    Intersection *isect = hits;
 #    ifdef __VOLUME__
 #      ifdef __SPLIT_KERNEL__
-		ccl_addr_space
+    ccl_addr_space
 #      endif
-		PathState *ps = shadow_blocked_volume_path_state(kg,
-		                                                 &volume_state,
-		                                                 state,
-		                                                 sd,
-		                                                 ray);
+        PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
 #    endif
-		sort_intersections(hits, num_hits);
-		for(int hit = 0; hit < num_hits; hit++, isect++) {
-			/* Adjust intersection distance for moving ray forward. */
-			float new_t = isect->t;
-			isect->t -= last_t;
-			/* Skip hit if we did not move forward, step by step raytracing
-			 * would have skipped it as well then.
-			 */
-			if(last_t == new_t) {
-				continue;
-			}
-			last_t = new_t;
-			/* Attenuate the throughput. */
-			if(shadow_handle_transparent_isect(kg,
-			                                   shadow_sd,
-			                                   state,
-#ifdef __VOLUME__
-			                                   ps,
-#endif
-			                                   isect,
-			                                   ray,
-			                                   &throughput))
-			{
-				return true;
-			}
-			/* Move ray forward. */
-			ray->P = shadow_sd->P;
-			if(ray->t != FLT_MAX) {
-				ray->D = normalize_len(Pend - ray->P, &ray->t);
-			}
-			bounce++;
-		}
+    sort_intersections(hits, num_hits);
+    for (int hit = 0; hit < num_hits; hit++, isect++) {
+      /* Adjust intersection distance for moving ray forward. */
+      float new_t = isect->t;
+      isect->t -= last_t;
+      /* Skip hit if we did not move forward, step by step raytracing
+       * would have skipped it as well then.
+       */
+      if (last_t == new_t) {
+        continue;
+      }
+      last_t = new_t;
+      /* Attenuate the throughput. */
+      if (shadow_handle_transparent_isect(kg,
+                                          shadow_sd,
+                                          state,
 #    ifdef __VOLUME__
-		/* Attenuation for last line segment towards light. */
-		if(ps->volume_stack[0].shader != SHADER_NONE) {
-			kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
-		}
+                                          ps,
 #    endif
-		*shadow = throughput;
-		return is_zero(throughput);
-	}
+                                          isect,
+                                          ray,
+                                          &throughput)) {
+        return true;
+      }
+      /* Move ray forward. */
+      ray->P = shadow_sd->P;
+      if (ray->t != FLT_MAX) {
+        ray->D = normalize_len(Pend - ray->P, &ray->t);
+      }
+      bounce++;
+    }
 #    ifdef __VOLUME__
-	if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
-		/* Apply attenuation from current volume shader. */
+    /* Attenuation for last line segment towards light. */
+    if (ps->volume_stack[0].shader != SHADER_NONE) {
+      kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
+    }
+#    endif
+    *shadow = throughput;
+    return is_zero(throughput);
+  }
+#    ifdef __VOLUME__
+  if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
+    /* Apply attenuation from current volume shader. */
 #      ifdef __SPLIT_KERNEL__
-		ccl_addr_space
+    ccl_addr_space
 #      endif
-		PathState *ps = shadow_blocked_volume_path_state(kg,
-		                                                 &volume_state,
-		                                                 state,
-		                                                 sd,
-		                                                 ray);
-		kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
-	}
+        PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
+    kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
+  }
 #    endif
-	return blocked;
+  return blocked;
 }
 
 /* Here we do all device specific trickery before invoking actual traversal
@@ -272,43 +247,36 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
                                                float3 *shadow)
 {
 #    ifdef __SPLIT_KERNEL__
-	Intersection hits_[SHADOW_STACK_MAX_HITS];
-	Intersection *hits = &hits_[0];
+  Intersection hits_[SHADOW_STACK_MAX_HITS];
+  Intersection *hits = &hits_[0];
 #    elif defined(__KERNEL_CUDA__)
-	Intersection *hits = kg->hits_stack;
+  Intersection *hits = kg->hits_stack;
 #    else
-	Intersection hits_stack[SHADOW_STACK_MAX_HITS];
-	Intersection *hits = hits_stack;
+  Intersection hits_stack[SHADOW_STACK_MAX_HITS];
+  Intersection *hits = hits_stack;
 #    endif
 #    ifndef __KERNEL_GPU__
-	/* Prefer to use stack but use dynamic allocation if too deep max hits
-	 * we need max_hits + 1 storage space due to the logic in
-	 * scene_intersect_shadow_all which will first store and then check if
-	 * the limit is exceeded.
-	 *
-	 * Ignore this on GPU because of slow/unavailable malloc().
-	 */
-	if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
-		if(kg->transparent_shadow_intersections == NULL) {
-			const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
-			kg->transparent_shadow_intersections =
-				(Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
-		}
-		hits = kg->transparent_shadow_intersections;
-	}
-#    endif  /* __KERNEL_GPU__ */
-	/* Invoke actual traversal. */
-	return shadow_blocked_transparent_all_loop(kg,
-	                                           sd,
-	                                           shadow_sd,
-	                                           state,
-	                                           visibility,
-	                                           ray,
-	                                           hits,
-	                                           max_hits,
-	                                           shadow);
+  /* Prefer to use stack but use dynamic allocation if too deep max hits
+   * we need max_hits + 1 storage space due to the logic in
+   * scene_intersect_shadow_all which will first store and then check if
+   * the limit is exceeded.
+   *
+   * Ignore this on GPU because of slow/unavailable malloc().
+   */
+  if (max_hits + 1 > SHADOW_STACK_MAX_HITS) {
+    if (kg->transparent_shadow_intersections == NULL) {
+      const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+      kg->transparent_shadow_intersections = (Intersection *)malloc(sizeof(Intersection) *
+                                                                    (transparent_max_bounce + 1));
+    }
+    hits = kg->transparent_shadow_intersections;
+  }
+#    endif /* __KERNEL_GPU__ */
+  /* Invoke actual traversal. */
+  return shadow_blocked_transparent_all_loop(
+      kg, sd, shadow_sd, state, visibility, ray, hits, max_hits, shadow);
 }
-#  endif  /* __SHADOW_RECORD_ALL__ */
+#  endif /* __SHADOW_RECORD_ALL__ */
 
 #  if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
 /* Shadow function to compute how much light is blocked,
@@ -323,130 +291,100 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
 /* This function is only implementing device-independent traversal logic
  * which requires some precalculation done.
  */
-ccl_device bool shadow_blocked_transparent_stepped_loop(
-        KernelGlobals *kg,
-        ShaderData *sd,
-        ShaderData *shadow_sd,
-        ccl_addr_space PathState *state,
-        const uint visibility,
-        Ray *ray,
-        Intersection *isect,
-        const bool blocked,
-        const bool is_transparent_isect,
-        float3 *shadow)
+ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg,
+                                                        ShaderData *sd,
+                                                        ShaderData *shadow_sd,
+                                                        ccl_addr_space PathState *state,
+                                                        const uint visibility,
+                                                        Ray *ray,
+                                                        Intersection *isect,
+                                                        const bool blocked,
+                                                        const bool is_transparent_isect,
+                                                        float3 *shadow)
 {
 #    ifdef __VOLUME__
-	VolumeState volume_state;
+  VolumeState volume_state;
 #    endif
-	if(blocked && is_transparent_isect) {
-		float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-		float3 Pend = ray->P + ray->D*ray->t;
-		int bounce = state->transparent_bounce;
+  if (blocked && is_transparent_isect) {
+    float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+    float3 Pend = ray->P + ray->D * ray->t;
+    int bounce = state->transparent_bounce;
 #    ifdef __VOLUME__
 #      ifdef __SPLIT_KERNEL__
-		ccl_addr_space
+    ccl_addr_space
 #      endif
-		PathState *ps = shadow_blocked_volume_path_state(kg,
-		                                                 &volume_state,
-		                                                 state,
-		                                                 sd,
-		                                                 ray);
+        PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
 #    endif
-		for(;;) {
-			if(bounce >= kernel_data.integrator.transparent_max_bounce) {
-				return true;
-			}
-			if(!scene_intersect(kg,
-			                    *ray,
-			                    visibility & PATH_RAY_SHADOW_TRANSPARENT,
-			                    isect,
-			                    NULL,
-			                    0.0f, 0.0f))
-			{
-				break;
-			}
-			if(!shader_transparent_shadow(kg, isect)) {
-				return true;
-			}
-			/* Attenuate the throughput. */
-			if(shadow_handle_transparent_isect(kg,
-			                                   shadow_sd,
-			                                   state,
-#ifdef __VOLUME__
-			                                   ps,
-#endif
-			                                   isect,
-			                                   ray,
-			                                   &throughput))
-			{
-				return true;
-			}
-			/* Move ray forward. */
-			ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
-			if(ray->t != FLT_MAX) {
-				ray->D = normalize_len(Pend - ray->P, &ray->t);
-			}
-			bounce++;
-		}
+    for (;;) {
+      if (bounce >= kernel_data.integrator.transparent_max_bounce) {
+        return true;
+      }
+      if (!scene_intersect(
+              kg, *ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f)) {
+        break;
+      }
+      if (!shader_transparent_shadow(kg, isect)) {
+        return true;
+      }
+      /* Attenuate the throughput. */
+      if (shadow_handle_transparent_isect(kg,
+                                          shadow_sd,
+                                          state,
+#    ifdef __VOLUME__
+                                          ps,
+#    endif
+                                          isect,
+                                          ray,
+                                          &throughput)) {
+        return true;
+      }
+      /* Move ray forward. */
+      ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
+      if (ray->t != FLT_MAX) {
+        ray->D = normalize_len(Pend - ray->P, &ray->t);
+      }
+      bounce++;
+    }
 #    ifdef __VOLUME__
-		/* Attenuation for last line segment towards light. */
-		if(ps->volume_stack[0].shader != SHADER_NONE) {
-			kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
-		}
+    /* Attenuation for last line segment towards light. */
+    if (ps->volume_stack[0].shader != SHADER_NONE) {
+      kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
+    }
 #    endif
-		*shadow *= throughput;
-		return is_zero(throughput);
-	}
+    *shadow *= throughput;
+    return is_zero(throughput);
+  }
 #    ifdef __VOLUME__
-	if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
-		/* Apply attenuation from current volume shader. */
+  if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
+    /* Apply attenuation from current volume shader. */
 #      ifdef __SPLIT_KERNEL__
-		ccl_addr_space
+    ccl_addr_space
 #      endif
-		PathState *ps = shadow_blocked_volume_path_state(kg,
-		                                                 &volume_state,
-		                                                 state,
-		                                                 sd,
-		                                                 ray);
-		kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
-	}
+        PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
+    kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
+  }
 #    endif
-	return blocked;
+  return blocked;
 }
 
-ccl_device bool shadow_blocked_transparent_stepped(
-        KernelGlobals *kg,
-        ShaderData *sd,
-        ShaderData *shadow_sd,
-        ccl_addr_space PathState *state,
-        const uint visibility,
-        Ray *ray,
-        Intersection *isect,
-        float3 *shadow)
+ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg,
+                                                   ShaderData *sd,
+                                                   ShaderData *shadow_sd,
+                                                   ccl_addr_space PathState *state,
+                                                   const uint visibility,
+                                                   Ray *ray,
+                                                   Intersection *isect,
+                                                   float3 *shadow)
 {
-	bool blocked = scene_intersect(kg,
-	                               *ray,
-	                               visibility & PATH_RAY_SHADOW_OPAQUE,
-	                               isect,
-	                               NULL,
-	                               0.0f, 0.0f);
-	bool is_transparent_isect = blocked
-		? shader_transparent_shadow(kg, isect)
-		: false;
-	return shadow_blocked_transparent_stepped_loop(kg,
-	                                               sd,
-	                                               shadow_sd,
-	                                               state,
-	                                               visibility,
-	                                               ray,
-	                                               isect,
-	                                               blocked,
-	                                               is_transparent_isect,
-	                                               shadow);
+  bool blocked = scene_intersect(
+      kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f);
+  bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false;
+  return shadow_blocked_transparent_stepped_loop(
+      kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow);
 }
 
-#  endif  /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
-#endif  /* __TRANSPARENT_SHADOWS__ */
+#  endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
+#endif   /* __TRANSPARENT_SHADOWS__ */
 
 ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
                                       ShaderData *sd,
@@ -455,100 +393,65 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
                                       Ray *ray_input,
                                       float3 *shadow)
 {
-	Ray *ray = ray_input;
-	Intersection isect;
-	/* Some common early checks. */
-	*shadow = make_float3(1.0f, 1.0f, 1.0f);
-	if(ray->t == 0.0f) {
-		return false;
-	}
+  Ray *ray = ray_input;
+  Intersection isect;
+  /* Some common early checks. */
+  *shadow = make_float3(1.0f, 1.0f, 1.0f);
+  if (ray->t == 0.0f) {
+    return false;
+  }
 #ifdef __SHADOW_TRICKS__
-	const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER)
-		? PATH_RAY_SHADOW_NON_CATCHER
-		: PATH_RAY_SHADOW;
+  const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) ? PATH_RAY_SHADOW_NON_CATCHER :
+                                                                    PATH_RAY_SHADOW;
 #else
-	const uint visibility = PATH_RAY_SHADOW;
+  const uint visibility = PATH_RAY_SHADOW;
 #endif
-	/* Do actual shadow shading. */
-	/* First of all, we check if integrator requires transparent shadows.
-	 * if not, we use simplest and fastest ever way to calculate occlusion.
-	 */
+  /* Do actual shadow shading. */
+  /* First of all, we check if integrator requires transparent shadows.
+   * if not, we use simplest and fastest ever way to calculate occlusion.
+   */
 #ifdef __TRANSPARENT_SHADOWS__
-	if(!kernel_data.integrator.transparent_shadows)
+  if (!kernel_data.integrator.transparent_shadows)
 #endif
-	{
-		return shadow_blocked_opaque(kg,
-		                             shadow_sd,
-		                             state,
-		                             visibility,
-		                             ray,
-		                             &isect,
-		                             shadow);
-	}
+  {
+    return shadow_blocked_opaque(kg, shadow_sd, state, visibility, ray, &isect, shadow);
+  }
 #ifdef __TRANSPARENT_SHADOWS__
 #  ifdef __SHADOW_RECORD_ALL__
-	/* For the transparent shadows we try to use record-all logic on the
-	 * devices which supports this.
-	 */
-	const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
-	/* Check transparent bounces here, for volume scatter which can do
-	 * lighting before surface path termination is checked.
-	 */
-	if(state->transparent_bounce >= transparent_max_bounce) {
-		return true;
-	}
-	const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
+  /* For the transparent shadows we try to use record-all logic on the
+   * devices which supports this.
+   */
+  const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+  /* Check transparent bounces here, for volume scatter which can do
+   * lighting before surface path termination is checked.
+   */
+  if (state->transparent_bounce >= transparent_max_bounce) {
+    return true;
+  }
+  const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
 #    ifdef __KERNEL_GPU__
-	/* On GPU we do trickey with tracing opaque ray first, this avoids speed
-	 * regressions in some files.
-	 *
-	 * TODO(sergey): Check why using record-all behavior causes slowdown in such
-	 * cases. Could that be caused by a higher spill pressure?
-	 */
-	const bool blocked = scene_intersect(kg,
-	                                     *ray,
-	                                     visibility & PATH_RAY_SHADOW_OPAQUE,
-	                                     &isect,
-	                                     NULL,
-	                                     0.0f, 0.0f);
-	const bool is_transparent_isect = blocked
-	        ? shader_transparent_shadow(kg, &isect)
-	        : false;
-	if(!blocked || !is_transparent_isect ||
-	   max_hits + 1 >= SHADOW_STACK_MAX_HITS)
-	{
-		return shadow_blocked_transparent_stepped_loop(kg,
-		                                               sd,
-		                                               shadow_sd,
-		                                               state,
-		                                               visibility,
-		                                               ray,
-		                                               &isect,
-		                                               blocked,
-		                                               is_transparent_isect,
-		                                               shadow);
-	}
-#    endif  /* __KERNEL_GPU__ */
-	return shadow_blocked_transparent_all(kg,
-	                                      sd,
-	                                      shadow_sd,
-	                                      state,
-	                                      visibility,
-	                                      ray,
-	                                      max_hits,
-	                                      shadow);
+  /* On GPU we do trickey with tracing opaque ray first, this avoids speed
+   * regressions in some files.
+   *
+   * TODO(sergey): Check why using record-all behavior causes slowdown in such
+   * cases. Could that be caused by a higher spill pressure?
+   */
+  const bool blocked = scene_intersect(
+      kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
+  const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false;
+  if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) {
+    return shadow_blocked_transparent_stepped_loop(
+        kg, sd, shadow_sd, state, visibility, ray, &isect, blocked, is_transparent_isect, shadow);
+  }
+#    endif /* __KERNEL_GPU__ */
+  return shadow_blocked_transparent_all(
+      kg, sd, shadow_sd, state, visibility, ray, max_hits, shadow);
 #  else  /* __SHADOW_RECORD_ALL__ */
-	/* Fallback to a slowest version which works on all devices. */
-	return shadow_blocked_transparent_stepped(kg,
-	                                          sd,
-	                                          shadow_sd,
-	                                          state,
-	                                          visibility,
-	                                          ray,
-	                                          &isect,
-	                                          shadow);
-#  endif  /* __SHADOW_RECORD_ALL__ */
-#endif  /* __TRANSPARENT_SHADOWS__ */
+  /* Fallback to a slowest version which works on all devices. */
+  return shadow_blocked_transparent_stepped(
+      kg, sd, shadow_sd, state, visibility, ray, &isect, shadow);
+#  endif /* __SHADOW_RECORD_ALL__ */
+#endif   /* __TRANSPARENT_SHADOWS__ */
 }
 
 #undef SHADOW_STACK_MAX_HITS
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 96b717530ce..7510e50a962 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -22,317 +22,295 @@ CCL_NAMESPACE_BEGIN
  * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
  */
 
-ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
-                                                 const ShaderClosure *sc,
-                                                 float disk_r,
-                                                 float r,
-                                                 bool all)
+ccl_device_inline float3
+subsurface_scatter_eval(ShaderData *sd, const ShaderClosure *sc, float disk_r, float r, bool all)
 {
-	/* this is the veach one-sample model with balance heuristic, some pdf
-	 * factors drop out when using balance heuristic weighting */
-	float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
-	float pdf_sum = 0.0f;
-	float sample_weight_inv = 0.0f;
+  /* this is the veach one-sample model with balance heuristic, some pdf
+   * factors drop out when using balance heuristic weighting */
+  float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
+  float pdf_sum = 0.0f;
+  float sample_weight_inv = 0.0f;
 
-	if(!all) {
-		float sample_weight_sum = 0.0f;
+  if (!all) {
+    float sample_weight_sum = 0.0f;
 
-		for(int i = 0; i < sd->num_closure; i++) {
-			sc = &sd->closure[i];
+    for (int i = 0; i < sd->num_closure; i++) {
+      sc = &sd->closure[i];
 
-			if(CLOSURE_IS_DISK_BSSRDF(sc->type)) {
-				sample_weight_sum += sc->sample_weight;
-			}
-		}
+      if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
+        sample_weight_sum += sc->sample_weight;
+      }
+    }
 
-		sample_weight_inv = 1.0f/sample_weight_sum;
-	}
+    sample_weight_inv = 1.0f / sample_weight_sum;
+  }
 
-	for(int i = 0; i < sd->num_closure; i++) {
-		sc = &sd->closure[i];
+  for (int i = 0; i < sd->num_closure; i++) {
+    sc = &sd->closure[i];
 
-		if(CLOSURE_IS_DISK_BSSRDF(sc->type)) {
-			/* in case of branched path integrate we sample all bssrdf's once,
-			 * for path trace we pick one, so adjust pdf for that */
-			float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv;
+    if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
+      /* in case of branched path integrate we sample all bssrdf's once,
+       * for path trace we pick one, so adjust pdf for that */
+      float sample_weight = (all) ? 1.0f : sc->sample_weight * sample_weight_inv;
 
-			/* compute pdf */
-			float3 eval = bssrdf_eval(sc, r);
-			float pdf = bssrdf_pdf(sc, disk_r);
+      /* compute pdf */
+      float3 eval = bssrdf_eval(sc, r);
+      float pdf = bssrdf_pdf(sc, disk_r);
 
-			eval_sum += sc->weight * eval;
-			pdf_sum += sample_weight * pdf;
-		}
-	}
+      eval_sum += sc->weight * eval;
+      pdf_sum += sample_weight * pdf;
+    }
+  }
 
-	return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
+  return (pdf_sum > 0.0f) ? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
 }
 
 /* replace closures with a single diffuse bsdf closure after scatter step */
-ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, ClosureType type, float roughness, float3 weight, float3 N)
+ccl_device void subsurface_scatter_setup_diffuse_bsdf(
+    KernelGlobals *kg, ShaderData *sd, ClosureType type, float roughness, float3 weight, float3 N)
 {
-	sd->flag &= ~SD_CLOSURE_FLAGS;
-	sd->num_closure = 0;
-	sd->num_closure_left = kernel_data.integrator.max_closures;
+  sd->flag &= ~SD_CLOSURE_FLAGS;
+  sd->num_closure = 0;
+  sd->num_closure_left = kernel_data.integrator.max_closures;
 
 #ifdef __PRINCIPLED__
-	if(type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
-	   type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
-	{
-		PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight);
-
-		if(bsdf) {
-			bsdf->N = N;
-			bsdf->roughness = roughness;
-			sd->flag |= bsdf_principled_diffuse_setup(bsdf);
-
-			/* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
-			 * can recognize it as not being a regular Disney principled diffuse closure */
-			bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
-		}
-	}
-	else if(CLOSURE_IS_BSDF_BSSRDF(type) ||
-			CLOSURE_IS_BSSRDF(type))
-#endif  /* __PRINCIPLED__ */
-	{
-		DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
-
-		if(bsdf) {
-			bsdf->N = N;
-			sd->flag |= bsdf_diffuse_setup(bsdf);
-
-			/* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
-			 * can recognize it as not being a regular diffuse closure */
-			bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
-		}
-	}
+  if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
+    PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+        sd, sizeof(PrincipledDiffuseBsdf), weight);
+
+    if (bsdf) {
+      bsdf->N = N;
+      bsdf->roughness = roughness;
+      sd->flag |= bsdf_principled_diffuse_setup(bsdf);
+
+      /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
+       * can recognize it as not being a regular Disney principled diffuse closure */
+      bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
+    }
+  }
+  else if (CLOSURE_IS_BSDF_BSSRDF(type) || CLOSURE_IS_BSSRDF(type))
+#endif /* __PRINCIPLED__ */
+  {
+    DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+
+    if (bsdf) {
+      bsdf->N = N;
+      sd->flag |= bsdf_diffuse_setup(bsdf);
+
+      /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
+       * can recognize it as not being a regular diffuse closure */
+      bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
+    }
+  }
 }
 
 /* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
 ccl_device float3 subsurface_color_pow(float3 color, float exponent)
 {
-	color = max(color, make_float3(0.0f, 0.0f, 0.0f));
-
-	if(exponent == 1.0f) {
-		/* nothing to do */
-	}
-	else if(exponent == 0.5f) {
-		color.x = sqrtf(color.x);
-		color.y = sqrtf(color.y);
-		color.z = sqrtf(color.z);
-	}
-	else {
-		color.x = powf(color.x, exponent);
-		color.y = powf(color.y, exponent);
-		color.z = powf(color.z, exponent);
-	}
-
-	return color;
+  color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+
+  if (exponent == 1.0f) {
+    /* nothing to do */
+  }
+  else if (exponent == 0.5f) {
+    color.x = sqrtf(color.x);
+    color.y = sqrtf(color.y);
+    color.z = sqrtf(color.z);
+  }
+  else {
+    color.x = powf(color.x, exponent);
+    color.y = powf(color.y, exponent);
+    color.z = powf(color.z, exponent);
+  }
+
+  return color;
 }
 
-ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
-                                           ShaderData *sd,
-                                           ccl_addr_space PathState *state,
-                                           float3 *eval,
-                                           float3 *N)
+ccl_device void subsurface_color_bump_blur(
+    KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float3 *eval, float3 *N)
 {
-	/* average color and texture blur at outgoing point */
-	float texture_blur;
-	float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
-
-	/* do we have bump mapping? */
-	bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
-
-	if(bump || texture_blur > 0.0f) {
-		/* average color and normal at incoming point */
-		shader_eval_surface(kg, sd, state, state->flag);
-		float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
-
-		/* we simply divide out the average color and multiply with the average
-		 * of the other one. we could try to do this per closure but it's quite
-		 * tricky to match closures between shader evaluations, their number and
-		 * order may change, this is simpler */
-		if(texture_blur > 0.0f) {
-			out_color = subsurface_color_pow(out_color, texture_blur);
-			in_color = subsurface_color_pow(in_color, texture_blur);
-
-			*eval *= safe_divide_color(in_color, out_color);
-		}
-	}
+  /* average color and texture blur at outgoing point */
+  float texture_blur;
+  float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
+
+  /* do we have bump mapping? */
+  bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
+
+  if (bump || texture_blur > 0.0f) {
+    /* average color and normal at incoming point */
+    shader_eval_surface(kg, sd, state, state->flag);
+    float3 in_color = shader_bssrdf_sum(sd, (bump) ? N : NULL, NULL);
+
+    /* we simply divide out the average color and multiply with the average
+     * of the other one. we could try to do this per closure but it's quite
+     * tricky to match closures between shader evaluations, their number and
+     * order may change, this is simpler */
+    if (texture_blur > 0.0f) {
+      out_color = subsurface_color_pow(out_color, texture_blur);
+      in_color = subsurface_color_pow(in_color, texture_blur);
+
+      *eval *= safe_divide_color(in_color, out_color);
+    }
+  }
 }
 
 /* Subsurface scattering step, from a point on the surface to other
  * nearby points on the same object.
  */
-ccl_device_inline int subsurface_scatter_disk(
-        KernelGlobals *kg,
-        LocalIntersection *ss_isect,
-        ShaderData *sd,
-        const ShaderClosure *sc,
-        uint *lcg_state,
-        float disk_u,
-        float disk_v,
-        bool all)
+ccl_device_inline int subsurface_scatter_disk(KernelGlobals *kg,
+                                              LocalIntersection *ss_isect,
+                                              ShaderData *sd,
+                                              const ShaderClosure *sc,
+                                              uint *lcg_state,
+                                              float disk_u,
+                                              float disk_v,
+                                              bool all)
 {
-	/* pick random axis in local frame and point on disk */
-	float3 disk_N, disk_T, disk_B;
-	float pick_pdf_N, pick_pdf_T, pick_pdf_B;
-
-	disk_N = sd->Ng;
-	make_orthonormals(disk_N, &disk_T, &disk_B);
-
-	if(disk_v < 0.5f) {
-		pick_pdf_N = 0.5f;
-		pick_pdf_T = 0.25f;
-		pick_pdf_B = 0.25f;
-		disk_v *= 2.0f;
-	}
-	else if(disk_v < 0.75f) {
-		float3 tmp = disk_N;
-		disk_N = disk_T;
-		disk_T = tmp;
-		pick_pdf_N = 0.25f;
-		pick_pdf_T = 0.5f;
-		pick_pdf_B = 0.25f;
-		disk_v = (disk_v - 0.5f)*4.0f;
-	}
-	else {
-		float3 tmp = disk_N;
-		disk_N = disk_B;
-		disk_B = tmp;
-		pick_pdf_N = 0.25f;
-		pick_pdf_T = 0.25f;
-		pick_pdf_B = 0.5f;
-		disk_v = (disk_v - 0.75f)*4.0f;
-	}
-
-	/* sample point on disk */
-	float phi = M_2PI_F * disk_v;
-	float disk_height, disk_r;
-
-	bssrdf_sample(sc, disk_u, &disk_r, &disk_height);
-
-	float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
-
-	/* create ray */
+  /* pick random axis in local frame and point on disk */
+  float3 disk_N, disk_T, disk_B;
+  float pick_pdf_N, pick_pdf_T, pick_pdf_B;
+
+  disk_N = sd->Ng;
+  make_orthonormals(disk_N, &disk_T, &disk_B);
+
+  if (disk_v < 0.5f) {
+    pick_pdf_N = 0.5f;
+    pick_pdf_T = 0.25f;
+    pick_pdf_B = 0.25f;
+    disk_v *= 2.0f;
+  }
+  else if (disk_v < 0.75f) {
+    float3 tmp = disk_N;
+    disk_N = disk_T;
+    disk_T = tmp;
+    pick_pdf_N = 0.25f;
+    pick_pdf_T = 0.5f;
+    pick_pdf_B = 0.25f;
+    disk_v = (disk_v - 0.5f) * 4.0f;
+  }
+  else {
+    float3 tmp = disk_N;
+    disk_N = disk_B;
+    disk_B = tmp;
+    pick_pdf_N = 0.25f;
+    pick_pdf_T = 0.25f;
+    pick_pdf_B = 0.5f;
+    disk_v = (disk_v - 0.75f) * 4.0f;
+  }
+
+  /* sample point on disk */
+  float phi = M_2PI_F * disk_v;
+  float disk_height, disk_r;
+
+  bssrdf_sample(sc, disk_u, &disk_r, &disk_height);
+
+  float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
+
+  /* create ray */
 #ifdef __SPLIT_KERNEL__
-	Ray ray_object = ss_isect->ray;
-	Ray *ray = &ray_object;
+  Ray ray_object = ss_isect->ray;
+  Ray *ray = &ray_object;
 #else
-	Ray *ray = &ss_isect->ray;
+  Ray *ray = &ss_isect->ray;
 #endif
-	ray->P = sd->P + disk_N*disk_height + disk_P;
-	ray->D = -disk_N;
-	ray->t = 2.0f*disk_height;
-	ray->dP = sd->dP;
-	ray->dD = differential3_zero();
-	ray->time = sd->time;
-
-	/* intersect with the same object. if multiple intersections are found it
-	 * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
-	scene_intersect_local(kg,
-	                      *ray,
-	                      ss_isect,
-	                      sd->object,
-	                      lcg_state,
-	                      BSSRDF_MAX_HITS);
-	int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
-
-	for(int hit = 0; hit < num_eval_hits; hit++) {
-		/* Quickly retrieve P and Ng without setting up ShaderData. */
-		float3 hit_P;
-		if(sd->type & PRIMITIVE_TRIANGLE) {
-			hit_P = triangle_refine_local(kg,
-			                              sd,
-			                              &ss_isect->hits[hit],
-			                              ray);
-		}
+  ray->P = sd->P + disk_N * disk_height + disk_P;
+  ray->D = -disk_N;
+  ray->t = 2.0f * disk_height;
+  ray->dP = sd->dP;
+  ray->dD = differential3_zero();
+  ray->time = sd->time;
+
+  /* intersect with the same object. if multiple intersections are found it
+   * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
+  scene_intersect_local(kg, *ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
+  int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
+
+  for (int hit = 0; hit < num_eval_hits; hit++) {
+    /* Quickly retrieve P and Ng without setting up ShaderData. */
+    float3 hit_P;
+    if (sd->type & PRIMITIVE_TRIANGLE) {
+      hit_P = triangle_refine_local(kg, sd, &ss_isect->hits[hit], ray);
+    }
 #ifdef __OBJECT_MOTION__
-		else  if(sd->type & PRIMITIVE_MOTION_TRIANGLE) {
-			float3 verts[3];
-			motion_triangle_vertices(
-			        kg,
-			        sd->object,
-			        kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim),
-			        sd->time,
-			        verts);
-			hit_P = motion_triangle_refine_local(kg,
-			                                     sd,
-			                                     &ss_isect->hits[hit],
-			                                     ray,
-			                                     verts);
-		}
-#endif  /* __OBJECT_MOTION__ */
-		else {
-			ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f);
-			continue;
-		}
-
-		float3 hit_Ng = ss_isect->Ng[hit];
-		if(ss_isect->hits[hit].object != OBJECT_NONE) {
-			object_normal_transform(kg, sd, &hit_Ng);
-		}
-
-		/* Probability densities for local frame axes. */
-		float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
-		float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
-		float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
-
-		/* Multiple importance sample between 3 axes, power heuristic
-		 * found to be slightly better than balance heuristic. pdf_N
-		 * in the MIS weight and denominator cancelled out. */
-		float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
-		if(ss_isect->num_hits > BSSRDF_MAX_HITS) {
-			w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS;
-		}
-
-		/* Real distance to sampled point. */
-		float r = len(hit_P - sd->P);
-
-		/* Evaluate profiles. */
-		float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
-
-		ss_isect->weight[hit] = eval;
-	}
+    else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
+      float3 verts[3];
+      motion_triangle_vertices(kg,
+                               sd->object,
+                               kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim),
+                               sd->time,
+                               verts);
+      hit_P = motion_triangle_refine_local(kg, sd, &ss_isect->hits[hit], ray, verts);
+    }
+#endif /* __OBJECT_MOTION__ */
+    else {
+      ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f);
+      continue;
+    }
+
+    float3 hit_Ng = ss_isect->Ng[hit];
+    if (ss_isect->hits[hit].object != OBJECT_NONE) {
+      object_normal_transform(kg, sd, &hit_Ng);
+    }
+
+    /* Probability densities for local frame axes. */
+    float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
+    float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
+    float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
+
+    /* Multiple importance sample between 3 axes, power heuristic
+     * found to be slightly better than balance heuristic. pdf_N
+     * in the MIS weight and denominator cancelled out. */
+    float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
+    if (ss_isect->num_hits > BSSRDF_MAX_HITS) {
+      w *= ss_isect->num_hits / (float)BSSRDF_MAX_HITS;
+    }
+
+    /* Real distance to sampled point. */
+    float r = len(hit_P - sd->P);
+
+    /* Evaluate profiles. */
+    float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
+
+    ss_isect->weight[hit] = eval;
+  }
 
 #ifdef __SPLIT_KERNEL__
-	ss_isect->ray = *ray;
+  ss_isect->ray = *ray;
 #endif
 
-	return num_eval_hits;
+  return num_eval_hits;
 }
 
-ccl_device_noinline void subsurface_scatter_multi_setup(
-        KernelGlobals *kg,
-        LocalIntersection* ss_isect,
-        int hit,
-        ShaderData *sd,
-        ccl_addr_space PathState *state,
-        ClosureType type,
-        float roughness)
+ccl_device_noinline void subsurface_scatter_multi_setup(KernelGlobals *kg,
+                                                        LocalIntersection *ss_isect,
+                                                        int hit,
+                                                        ShaderData *sd,
+                                                        ccl_addr_space PathState *state,
+                                                        ClosureType type,
+                                                        float roughness)
 {
 #ifdef __SPLIT_KERNEL__
-	Ray ray_object = ss_isect->ray;
-	Ray *ray = &ray_object;
+  Ray ray_object = ss_isect->ray;
+  Ray *ray = &ray_object;
 #else
-	Ray *ray = &ss_isect->ray;
+  Ray *ray = &ss_isect->ray;
 #endif
 
-	/* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
+  /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
 #if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
-	kernel_split_params.dummy_sd_flag = sd->flag;
+  kernel_split_params.dummy_sd_flag = sd->flag;
 #endif
 
-	/* Setup new shading point. */
-	shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
+  /* Setup new shading point. */
+  shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
 
-	/* Optionally blur colors and bump mapping. */
-	float3 weight = ss_isect->weight[hit];
-	float3 N = sd->N;
-	subsurface_color_bump_blur(kg, sd, state, &weight, &N);
+  /* Optionally blur colors and bump mapping. */
+  float3 weight = ss_isect->weight[hit];
+  float3 N = sd->N;
+  subsurface_color_bump_blur(kg, sd, state, &weight, &N);
 
-	/* Setup diffuse BSDF. */
-	subsurface_scatter_setup_diffuse_bsdf(kg, sd, type, roughness, weight, N);
+  /* Setup diffuse BSDF. */
+  subsurface_scatter_setup_diffuse_bsdf(kg, sd, type, roughness, weight, N);
 }
 
 /* Random walk subsurface scattering.
@@ -340,196 +318,178 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
  * "Practical and Controllable Subsurface Scattering for Production Path
  *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
 
-ccl_device void subsurface_random_walk_remap(
-        const float A,
-        const float d,
-        float *sigma_t,
-        float *sigma_s)
+ccl_device void subsurface_random_walk_remap(const float A,
+                                             const float d,
+                                             float *sigma_t,
+                                             float *sigma_s)
 {
-	/* Compute attenuation and scattering coefficients from albedo. */
-	const float a = 1.0f - expf(A * (-5.09406f + A * (2.61188f - A * 4.31805f)));
-	const float s = 1.9f - A + 3.5f * sqr(A - 0.8f);
+  /* Compute attenuation and scattering coefficients from albedo. */
+  const float a = 1.0f - expf(A * (-5.09406f + A * (2.61188f - A * 4.31805f)));
+  const float s = 1.9f - A + 3.5f * sqr(A - 0.8f);
 
-	*sigma_t = 1.0f / fmaxf(d * s, 1e-16f);
-	*sigma_s = *sigma_t * a;
+  *sigma_t = 1.0f / fmaxf(d * s, 1e-16f);
+  *sigma_s = *sigma_t * a;
 }
 
-ccl_device void subsurface_random_walk_coefficients(
-        const ShaderClosure *sc,
-        float3 *sigma_t,
-        float3 *sigma_s,
-        float3 *weight)
+ccl_device void subsurface_random_walk_coefficients(const ShaderClosure *sc,
+                                                    float3 *sigma_t,
+                                                    float3 *sigma_s,
+                                                    float3 *weight)
 {
-	const Bssrdf *bssrdf = (const Bssrdf*)sc;
-	const float3 A = bssrdf->albedo;
-	const float3 d = bssrdf->radius;
-	float sigma_t_x, sigma_t_y, sigma_t_z;
-	float sigma_s_x, sigma_s_y, sigma_s_z;
+  const Bssrdf *bssrdf = (const Bssrdf *)sc;
+  const float3 A = bssrdf->albedo;
+  const float3 d = bssrdf->radius;
+  float sigma_t_x, sigma_t_y, sigma_t_z;
+  float sigma_s_x, sigma_s_y, sigma_s_z;
 
-	subsurface_random_walk_remap(A.x, d.x, &sigma_t_x, &sigma_s_x);
-	subsurface_random_walk_remap(A.y, d.y, &sigma_t_y, &sigma_s_y);
-	subsurface_random_walk_remap(A.z, d.z, &sigma_t_z, &sigma_s_z);
+  subsurface_random_walk_remap(A.x, d.x, &sigma_t_x, &sigma_s_x);
+  subsurface_random_walk_remap(A.y, d.y, &sigma_t_y, &sigma_s_y);
+  subsurface_random_walk_remap(A.z, d.z, &sigma_t_z, &sigma_s_z);
 
-	*sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
-	*sigma_s = make_float3(sigma_s_x, sigma_s_y, sigma_s_z);
+  *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
+  *sigma_s = make_float3(sigma_s_x, sigma_s_y, sigma_s_z);
 
-	/* Closure mixing and Fresnel weights separate from albedo. */
-	*weight = safe_divide_color(bssrdf->weight, A);
+  /* Closure mixing and Fresnel weights separate from albedo. */
+  *weight = safe_divide_color(bssrdf->weight, A);
 }
 
-ccl_device_noinline bool subsurface_random_walk(
-        KernelGlobals *kg,
-        LocalIntersection *ss_isect,
-        ShaderData *sd,
-        ccl_addr_space PathState *state,
-        const ShaderClosure *sc,
-        const float bssrdf_u,
-        const float bssrdf_v)
+ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg,
+                                                LocalIntersection *ss_isect,
+                                                ShaderData *sd,
+                                                ccl_addr_space PathState *state,
+                                                const ShaderClosure *sc,
+                                                const float bssrdf_u,
+                                                const float bssrdf_v)
 {
-	/* Sample diffuse surface scatter into the object. */
-	float3 D;
-	float pdf;
-	sample_cos_hemisphere(-sd->N, bssrdf_u, bssrdf_v, &D, &pdf);
-	if(dot(-sd->Ng, D) <= 0.0f) {
-		return 0;
-	}
-
-	/* Convert subsurface to volume coefficients. */
-	float3 sigma_t, sigma_s;
-	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-	subsurface_random_walk_coefficients(sc, &sigma_t, &sigma_s, &throughput);
-
-	/* Setup ray. */
+  /* Sample diffuse surface scatter into the object. */
+  float3 D;
+  float pdf;
+  sample_cos_hemisphere(-sd->N, bssrdf_u, bssrdf_v, &D, &pdf);
+  if (dot(-sd->Ng, D) <= 0.0f) {
+    return 0;
+  }
+
+  /* Convert subsurface to volume coefficients. */
+  float3 sigma_t, sigma_s;
+  float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+  subsurface_random_walk_coefficients(sc, &sigma_t, &sigma_s, &throughput);
+
+  /* Setup ray. */
 #ifdef __SPLIT_KERNEL__
-	Ray ray_object = ss_isect->ray;
-	Ray *ray = &ray_object;
+  Ray ray_object = ss_isect->ray;
+  Ray *ray = &ray_object;
 #else
-	Ray *ray = &ss_isect->ray;
+  Ray *ray = &ss_isect->ray;
 #endif
-	ray->P = ray_offset(sd->P, -sd->Ng);
-	ray->D = D;
-	ray->t = FLT_MAX;
-	ray->time = sd->time;
-
-	/* Modify state for RNGs, decorrelated from other paths. */
-	uint prev_rng_offset = state->rng_offset;
-	uint prev_rng_hash = state->rng_hash;
-	state->rng_hash = cmj_hash(state->rng_hash + state->rng_offset, 0xdeadbeef);
-
-	/* Random walk until we hit the surface again. */
-	bool hit = false;
-
-	for(int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) {
-		/* Advance random number offset. */
-		state->rng_offset += PRNG_BOUNCE_NUM;
-
-		if(bounce > 0) {
-			/* Sample scattering direction. */
-			const float anisotropy = 0.0f;
-			float scatter_u, scatter_v;
-			path_state_rng_2D(kg, state, PRNG_BSDF_U, &scatter_u, &scatter_v);
-			ray->D = henyey_greenstrein_sample(ray->D, anisotropy, scatter_u, scatter_v, NULL);
-		}
-
-		/* Sample color channel, use MIS with balance heuristic. */
-		float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
-		float3 albedo = safe_divide_color(sigma_s, sigma_t);
-		float3 channel_pdf;
-		int channel = kernel_volume_sample_channel(albedo, throughput, rphase, &channel_pdf);
-
-		/* Distance sampling. */
-		float rdist = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-		float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
-		float t = -logf(1.0f - rdist)/sample_sigma_t;
-
-		ray->t = t;
-		scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1);
-		hit = (ss_isect->num_hits > 0);
-
-		if(hit) {
-			/* Compute world space distance to surface hit. */
-			float3 D = ray->D;
-			object_inverse_dir_transform(kg, sd, &D);
-			D = normalize(D) * ss_isect->hits[0].t;
-			object_dir_transform(kg, sd, &D);
-			t = len(D);
-		}
-
-		/* Advance to new scatter location. */
-		ray->P += t * ray->D;
-
-		/* Update throughput. */
-		float3 transmittance = volume_color_transmittance(sigma_t, t);
-		float pdf = dot(channel_pdf, (hit)? transmittance: sigma_t * transmittance);
-		throughput *= ((hit)? transmittance: sigma_s * transmittance) / pdf;
-
-		if(hit) {
-			/* If we hit the surface, we are done. */
-			break;
-		}
-
-		/* Russian roulette. */
-		float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
-		float probability = min(max3(fabs(throughput)), 1.0f);
-		if(terminate >= probability) {
-			break;
-		}
-		throughput /= probability;
-	}
-
-	kernel_assert(isfinite_safe(throughput.x) &&
-	              isfinite_safe(throughput.y) &&
-	              isfinite_safe(throughput.z));
-
-	state->rng_offset = prev_rng_offset;
-	state->rng_hash = prev_rng_hash;
-
-	/* Return number of hits in ss_isect. */
-	if(!hit) {
-		return 0;
-	}
-
-	/* TODO: gain back performance lost from merging with disk BSSRDF. We
-	 * only need to return on hit so this indirect ray push/pop overhead
-	 * is not actually needed, but it does keep the code simpler. */
-	ss_isect->weight[0] = throughput;
+  ray->P = ray_offset(sd->P, -sd->Ng);
+  ray->D = D;
+  ray->t = FLT_MAX;
+  ray->time = sd->time;
+
+  /* Modify state for RNGs, decorrelated from other paths. */
+  uint prev_rng_offset = state->rng_offset;
+  uint prev_rng_hash = state->rng_hash;
+  state->rng_hash = cmj_hash(state->rng_hash + state->rng_offset, 0xdeadbeef);
+
+  /* Random walk until we hit the surface again. */
+  bool hit = false;
+
+  for (int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) {
+    /* Advance random number offset. */
+    state->rng_offset += PRNG_BOUNCE_NUM;
+
+    if (bounce > 0) {
+      /* Sample scattering direction. */
+      const float anisotropy = 0.0f;
+      float scatter_u, scatter_v;
+      path_state_rng_2D(kg, state, PRNG_BSDF_U, &scatter_u, &scatter_v);
+      ray->D = henyey_greenstrein_sample(ray->D, anisotropy, scatter_u, scatter_v, NULL);
+    }
+
+    /* Sample color channel, use MIS with balance heuristic. */
+    float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+    float3 albedo = safe_divide_color(sigma_s, sigma_t);
+    float3 channel_pdf;
+    int channel = kernel_volume_sample_channel(albedo, throughput, rphase, &channel_pdf);
+
+    /* Distance sampling. */
+    float rdist = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+    float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
+    float t = -logf(1.0f - rdist) / sample_sigma_t;
+
+    ray->t = t;
+    scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1);
+    hit = (ss_isect->num_hits > 0);
+
+    if (hit) {
+      /* Compute world space distance to surface hit. */
+      float3 D = ray->D;
+      object_inverse_dir_transform(kg, sd, &D);
+      D = normalize(D) * ss_isect->hits[0].t;
+      object_dir_transform(kg, sd, &D);
+      t = len(D);
+    }
+
+    /* Advance to new scatter location. */
+    ray->P += t * ray->D;
+
+    /* Update throughput. */
+    float3 transmittance = volume_color_transmittance(sigma_t, t);
+    float pdf = dot(channel_pdf, (hit) ? transmittance : sigma_t * transmittance);
+    throughput *= ((hit) ? transmittance : sigma_s * transmittance) / pdf;
+
+    if (hit) {
+      /* If we hit the surface, we are done. */
+      break;
+    }
+
+    /* Russian roulette. */
+    float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+    float probability = min(max3(fabs(throughput)), 1.0f);
+    if (terminate >= probability) {
+      break;
+    }
+    throughput /= probability;
+  }
+
+  kernel_assert(isfinite_safe(throughput.x) && isfinite_safe(throughput.y) &&
+                isfinite_safe(throughput.z));
+
+  state->rng_offset = prev_rng_offset;
+  state->rng_hash = prev_rng_hash;
+
+  /* Return number of hits in ss_isect. */
+  if (!hit) {
+    return 0;
+  }
+
+  /* TODO: gain back performance lost from merging with disk BSSRDF. We
+   * only need to return on hit so this indirect ray push/pop overhead
+   * is not actually needed, but it does keep the code simpler. */
+  ss_isect->weight[0] = throughput;
 #ifdef __SPLIT_KERNEL__
-	ss_isect->ray = *ray;
+  ss_isect->ray = *ray;
 #endif
 
-	return 1;
+  return 1;
 }
 
-ccl_device_inline int subsurface_scatter_multi_intersect(
-        KernelGlobals *kg,
-        LocalIntersection *ss_isect,
-        ShaderData *sd,
-        ccl_addr_space PathState *state,
-        const ShaderClosure *sc,
-        uint *lcg_state,
-        float bssrdf_u,
-        float bssrdf_v,
-        bool all)
+ccl_device_inline int subsurface_scatter_multi_intersect(KernelGlobals *kg,
+                                                         LocalIntersection *ss_isect,
+                                                         ShaderData *sd,
+                                                         ccl_addr_space PathState *state,
+                                                         const ShaderClosure *sc,
+                                                         uint *lcg_state,
+                                                         float bssrdf_u,
+                                                         float bssrdf_v,
+                                                         bool all)
 {
-	if(CLOSURE_IS_DISK_BSSRDF(sc->type)) {
-		return subsurface_scatter_disk(kg,
-		                               ss_isect,
-		                               sd,
-		                               sc,
-		                               lcg_state,
-		                               bssrdf_u,
-		                               bssrdf_v,
-		                               all);
-	}
-	else {
-		return subsurface_random_walk(kg,
-		                              ss_isect,
-		                              sd,
-		                              state,
-		                              sc,
-		                              bssrdf_u,
-		                              bssrdf_v);
-	}
+  if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
+    return subsurface_scatter_disk(kg, ss_isect, sd, sc, lcg_state, bssrdf_u, bssrdf_v, all);
+  }
+  else {
+    return subsurface_random_walk(kg, ss_isect, sd, state, sc, bssrdf_u, bssrdf_v);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 4b1c8e82dfa..3f62b726b6a 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -42,26 +42,26 @@ CCL_NAMESPACE_BEGIN
 
 /* Constants */
 #define OBJECT_MOTION_PASS_SIZE 2
-#define FILTER_TABLE_SIZE       1024
-#define RAMP_TABLE_SIZE         256
-#define SHUTTER_TABLE_SIZE      256
+#define FILTER_TABLE_SIZE 1024
+#define RAMP_TABLE_SIZE 256
+#define SHUTTER_TABLE_SIZE 256
 
-#define BSSRDF_MIN_RADIUS			1e-8f
-#define BSSRDF_MAX_HITS				4
-#define BSSRDF_MAX_BOUNCES			256
-#define LOCAL_MAX_HITS				4
+#define BSSRDF_MIN_RADIUS 1e-8f
+#define BSSRDF_MAX_HITS 4
+#define BSSRDF_MAX_BOUNCES 256
+#define LOCAL_MAX_HITS 4
 
-#define VOLUME_BOUNDS_MAX       1024
+#define VOLUME_BOUNDS_MAX 1024
 
-#define BECKMANN_TABLE_SIZE		256
+#define BECKMANN_TABLE_SIZE 256
 
-#define SHADER_NONE				(~0)
-#define OBJECT_NONE				(~0)
-#define PRIM_NONE				(~0)
-#define LAMP_NONE				(~0)
-#define ID_NONE					(0.0f)
+#define SHADER_NONE (~0)
+#define OBJECT_NONE (~0)
+#define PRIM_NONE (~0)
+#define LAMP_NONE (~0)
+#define ID_NONE (0.0f)
 
-#define VOLUME_STACK_SIZE		32
+#define VOLUME_STACK_SIZE 32
 
 /* Split kernel constants */
 #define WORK_POOL_SIZE_GPU 64
@@ -72,7 +72,6 @@ CCL_NAMESPACE_BEGIN
 #  define WORK_POOL_SIZE WORK_POOL_SIZE_CPU
 #endif
 
-
 #define SHADER_SORT_BLOCK_SIZE 2048
 
 #ifdef __KERNEL_OPENCL__
@@ -137,16 +136,16 @@ CCL_NAMESPACE_BEGIN
 #  endif
 #  define __VOLUME_DECOUPLED__
 #  define __VOLUME_RECORD_ALL__
-#endif  /* __KERNEL_CPU__ */
+#endif /* __KERNEL_CPU__ */
 
 #ifdef __KERNEL_CUDA__
 #  ifdef __SPLIT_KERNEL__
 #    undef __BRANCHED_PATH__
 #  endif
-#endif  /* __KERNEL_CUDA__ */
+#endif /* __KERNEL_CUDA__ */
 
 #ifdef __KERNEL_OPENCL__
-#endif  /* __KERNEL_OPENCL__ */
+#endif /* __KERNEL_OPENCL__ */
 
 /* Scene-based selective features compilation. */
 #ifdef __NO_CAMERA_MOTION__
@@ -202,273 +201,269 @@ CCL_NAMESPACE_BEGIN
 /* Shader Evaluation */
 
 typedef enum ShaderEvalType {
-	SHADER_EVAL_DISPLACE,
-	SHADER_EVAL_BACKGROUND,
-	/* bake types */
-	SHADER_EVAL_BAKE, /* no real shade, it's used in the code to
-	                   * differentiate the type of shader eval from the above
-	                   */
-	/* data passes */
-	SHADER_EVAL_NORMAL,
-	SHADER_EVAL_UV,
-	SHADER_EVAL_ROUGHNESS,
-	SHADER_EVAL_DIFFUSE_COLOR,
-	SHADER_EVAL_GLOSSY_COLOR,
-	SHADER_EVAL_TRANSMISSION_COLOR,
-	SHADER_EVAL_SUBSURFACE_COLOR,
-	SHADER_EVAL_EMISSION,
-
-	/* light passes */
-	SHADER_EVAL_AO,
-	SHADER_EVAL_COMBINED,
-	SHADER_EVAL_SHADOW,
-	SHADER_EVAL_DIFFUSE,
-	SHADER_EVAL_GLOSSY,
-	SHADER_EVAL_TRANSMISSION,
-	SHADER_EVAL_SUBSURFACE,
-
-	/* extra */
-	SHADER_EVAL_ENVIRONMENT,
+  SHADER_EVAL_DISPLACE,
+  SHADER_EVAL_BACKGROUND,
+  /* bake types */
+  SHADER_EVAL_BAKE, /* no real shade, it's used in the code to
+                     * differentiate the type of shader eval from the above
+                     */
+  /* data passes */
+  SHADER_EVAL_NORMAL,
+  SHADER_EVAL_UV,
+  SHADER_EVAL_ROUGHNESS,
+  SHADER_EVAL_DIFFUSE_COLOR,
+  SHADER_EVAL_GLOSSY_COLOR,
+  SHADER_EVAL_TRANSMISSION_COLOR,
+  SHADER_EVAL_SUBSURFACE_COLOR,
+  SHADER_EVAL_EMISSION,
+
+  /* light passes */
+  SHADER_EVAL_AO,
+  SHADER_EVAL_COMBINED,
+  SHADER_EVAL_SHADOW,
+  SHADER_EVAL_DIFFUSE,
+  SHADER_EVAL_GLOSSY,
+  SHADER_EVAL_TRANSMISSION,
+  SHADER_EVAL_SUBSURFACE,
+
+  /* extra */
+  SHADER_EVAL_ENVIRONMENT,
 } ShaderEvalType;
 
 /* Path Tracing
  * note we need to keep the u/v pairs at even values */
 
 enum PathTraceDimension {
-	PRNG_FILTER_U = 0,
-	PRNG_FILTER_V = 1,
-	PRNG_LENS_U = 2,
-	PRNG_LENS_V = 3,
-	PRNG_TIME = 4,
-	PRNG_UNUSED_0 = 5,
-	PRNG_UNUSED_1 = 6,	/* for some reason (6, 7) is a bad sobol pattern */
-	PRNG_UNUSED_2 = 7,  /* with a low number of samples (< 64) */
-	PRNG_BASE_NUM = 10,
-
-	PRNG_BSDF_U = 0,
-	PRNG_BSDF_V = 1,
-	PRNG_LIGHT_U = 2,
-	PRNG_LIGHT_V = 3,
-	PRNG_LIGHT_TERMINATE = 4,
-	PRNG_TERMINATE = 5,
-	PRNG_PHASE_CHANNEL = 6,
-	PRNG_SCATTER_DISTANCE = 7,
-	PRNG_BOUNCE_NUM = 8,
-
-	PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */
-	PRNG_BEVEL_V = 7,
+  PRNG_FILTER_U = 0,
+  PRNG_FILTER_V = 1,
+  PRNG_LENS_U = 2,
+  PRNG_LENS_V = 3,
+  PRNG_TIME = 4,
+  PRNG_UNUSED_0 = 5,
+  PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */
+  PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */
+  PRNG_BASE_NUM = 10,
+
+  PRNG_BSDF_U = 0,
+  PRNG_BSDF_V = 1,
+  PRNG_LIGHT_U = 2,
+  PRNG_LIGHT_V = 3,
+  PRNG_LIGHT_TERMINATE = 4,
+  PRNG_TERMINATE = 5,
+  PRNG_PHASE_CHANNEL = 6,
+  PRNG_SCATTER_DISTANCE = 7,
+  PRNG_BOUNCE_NUM = 8,
+
+  PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */
+  PRNG_BEVEL_V = 7,
 };
 
 enum SamplingPattern {
-	SAMPLING_PATTERN_SOBOL = 0,
-	SAMPLING_PATTERN_CMJ = 1,
+  SAMPLING_PATTERN_SOBOL = 0,
+  SAMPLING_PATTERN_CMJ = 1,
 
-	SAMPLING_NUM_PATTERNS,
+  SAMPLING_NUM_PATTERNS,
 };
 
 /* these flags values correspond to raytypes in osl.cpp, so keep them in sync! */
 
 enum PathRayFlag {
-	PATH_RAY_CAMERA              = (1 << 0),
-	PATH_RAY_REFLECT             = (1 << 1),
-	PATH_RAY_TRANSMIT            = (1 << 2),
-	PATH_RAY_DIFFUSE             = (1 << 3),
-	PATH_RAY_GLOSSY              = (1 << 4),
-	PATH_RAY_SINGULAR            = (1 << 5),
-	PATH_RAY_TRANSPARENT         = (1 << 6),
-
-	PATH_RAY_SHADOW_OPAQUE_NON_CATCHER       = (1 << 7),
-	PATH_RAY_SHADOW_OPAQUE_CATCHER           = (1 << 8),
-	PATH_RAY_SHADOW_OPAQUE                   = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER|PATH_RAY_SHADOW_OPAQUE_CATCHER),
-	PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER  = (1 << 9),
-	PATH_RAY_SHADOW_TRANSPARENT_CATCHER      = (1 << 10),
-	PATH_RAY_SHADOW_TRANSPARENT              = (PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER|PATH_RAY_SHADOW_TRANSPARENT_CATCHER),
-	PATH_RAY_SHADOW_NON_CATCHER              = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER|PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER),
-	PATH_RAY_SHADOW                          = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),
-
-	PATH_RAY_CURVE               = (1 << 11), /* visibility flag to define curve segments */
-	PATH_RAY_VOLUME_SCATTER      = (1 << 12), /* volume scattering */
-
-	/* Special flag to tag unaligned BVH nodes. */
-	PATH_RAY_NODE_UNALIGNED = (1 << 13),
-
-	PATH_RAY_ALL_VISIBILITY = ((1 << 14)-1),
-
-	/* Don't apply multiple importance sampling weights to emission from
-	 * lamp or surface hits, because they were not direct light sampled. */
-	PATH_RAY_MIS_SKIP                    = (1 << 14),
-	/* Diffuse bounce earlier in the path, skip SSS to improve performance
-	 * and avoid branching twice with disk sampling SSS. */
-	PATH_RAY_DIFFUSE_ANCESTOR            = (1 << 15),
-	/* Single pass has been written. */
-	PATH_RAY_SINGLE_PASS_DONE            = (1 << 16),
-	/* Ray is behind a shadow catcher .*/
-	PATH_RAY_SHADOW_CATCHER              = (1 << 17),
-	/* Store shadow data for shadow catcher or denoising. */
-	PATH_RAY_STORE_SHADOW_INFO           = (1 << 18),
-	/* Zero background alpha, for camera or transparent glass rays. */
-	PATH_RAY_TRANSPARENT_BACKGROUND      = (1 << 19),
-	/* Terminate ray immediately at next bounce. */
-	PATH_RAY_TERMINATE_IMMEDIATE         = (1 << 20),
-	/* Ray is to be terminated, but continue with transparent bounces and
-	 * emission as long as we encounter them. This is required to make the
-	 * MIS between direct and indirect light rays match, as shadow rays go
-	 * through transparent surfaces to reach emisison too. */
-	PATH_RAY_TERMINATE_AFTER_TRANSPARENT = (1 << 21),
-	/* Ray is to be terminated. */
-	PATH_RAY_TERMINATE                   = (PATH_RAY_TERMINATE_IMMEDIATE|PATH_RAY_TERMINATE_AFTER_TRANSPARENT),
-	/* Path and shader is being evaluated for direct lighting emission. */
-	PATH_RAY_EMISSION                    = (1 << 22)
+  PATH_RAY_CAMERA = (1 << 0),
+  PATH_RAY_REFLECT = (1 << 1),
+  PATH_RAY_TRANSMIT = (1 << 2),
+  PATH_RAY_DIFFUSE = (1 << 3),
+  PATH_RAY_GLOSSY = (1 << 4),
+  PATH_RAY_SINGULAR = (1 << 5),
+  PATH_RAY_TRANSPARENT = (1 << 6),
+
+  PATH_RAY_SHADOW_OPAQUE_NON_CATCHER = (1 << 7),
+  PATH_RAY_SHADOW_OPAQUE_CATCHER = (1 << 8),
+  PATH_RAY_SHADOW_OPAQUE = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER | PATH_RAY_SHADOW_OPAQUE_CATCHER),
+  PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER = (1 << 9),
+  PATH_RAY_SHADOW_TRANSPARENT_CATCHER = (1 << 10),
+  PATH_RAY_SHADOW_TRANSPARENT = (PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER |
+                                 PATH_RAY_SHADOW_TRANSPARENT_CATCHER),
+  PATH_RAY_SHADOW_NON_CATCHER = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER |
+                                 PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER),
+  PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE | PATH_RAY_SHADOW_TRANSPARENT),
+
+  PATH_RAY_CURVE = (1 << 11),          /* visibility flag to define curve segments */
+  PATH_RAY_VOLUME_SCATTER = (1 << 12), /* volume scattering */
+
+  /* Special flag to tag unaligned BVH nodes. */
+  PATH_RAY_NODE_UNALIGNED = (1 << 13),
+
+  PATH_RAY_ALL_VISIBILITY = ((1 << 14) - 1),
+
+  /* Don't apply multiple importance sampling weights to emission from
+   * lamp or surface hits, because they were not direct light sampled. */
+  PATH_RAY_MIS_SKIP = (1 << 14),
+  /* Diffuse bounce earlier in the path, skip SSS to improve performance
+   * and avoid branching twice with disk sampling SSS. */
+  PATH_RAY_DIFFUSE_ANCESTOR = (1 << 15),
+  /* Single pass has been written. */
+  PATH_RAY_SINGLE_PASS_DONE = (1 << 16),
+  /* Ray is behind a shadow catcher .*/
+  PATH_RAY_SHADOW_CATCHER = (1 << 17),
+  /* Store shadow data for shadow catcher or denoising. */
+  PATH_RAY_STORE_SHADOW_INFO = (1 << 18),
+  /* Zero background alpha, for camera or transparent glass rays. */
+  PATH_RAY_TRANSPARENT_BACKGROUND = (1 << 19),
+  /* Terminate ray immediately at next bounce. */
+  PATH_RAY_TERMINATE_IMMEDIATE = (1 << 20),
+  /* Ray is to be terminated, but continue with transparent bounces and
+   * emission as long as we encounter them. This is required to make the
+   * MIS between direct and indirect light rays match, as shadow rays go
+   * through transparent surfaces to reach emisison too. */
+  PATH_RAY_TERMINATE_AFTER_TRANSPARENT = (1 << 21),
+  /* Ray is to be terminated. */
+  PATH_RAY_TERMINATE = (PATH_RAY_TERMINATE_IMMEDIATE | PATH_RAY_TERMINATE_AFTER_TRANSPARENT),
+  /* Path and shader is being evaluated for direct lighting emission. */
+  PATH_RAY_EMISSION = (1 << 22)
 };
 
 /* Closure Label */
 
 typedef enum ClosureLabel {
-	LABEL_NONE = 0,
-	LABEL_TRANSMIT = 1,
-	LABEL_REFLECT = 2,
-	LABEL_DIFFUSE = 4,
-	LABEL_GLOSSY = 8,
-	LABEL_SINGULAR = 16,
-	LABEL_TRANSPARENT = 32,
-	LABEL_VOLUME_SCATTER = 64,
-	LABEL_TRANSMIT_TRANSPARENT = 128,
+  LABEL_NONE = 0,
+  LABEL_TRANSMIT = 1,
+  LABEL_REFLECT = 2,
+  LABEL_DIFFUSE = 4,
+  LABEL_GLOSSY = 8,
+  LABEL_SINGULAR = 16,
+  LABEL_TRANSPARENT = 32,
+  LABEL_VOLUME_SCATTER = 64,
+  LABEL_TRANSMIT_TRANSPARENT = 128,
 } ClosureLabel;
 
 /* Render Passes */
 
-#define PASS_NAME_JOIN(a, b) a ## _ ## b
+#define PASS_NAME_JOIN(a, b) a##_##b
 #define PASSMASK(pass) (1 << ((PASS_NAME_JOIN(PASS, pass)) % 32))
 
-#define PASSMASK_COMPONENT(comp) (PASSMASK(PASS_NAME_JOIN(comp, DIRECT)) |   \
-                                  PASSMASK(PASS_NAME_JOIN(comp, INDIRECT)) | \
-                                  PASSMASK(PASS_NAME_JOIN(comp, COLOR)))
+#define PASSMASK_COMPONENT(comp) \
+  (PASSMASK(PASS_NAME_JOIN(comp, DIRECT)) | PASSMASK(PASS_NAME_JOIN(comp, INDIRECT)) | \
+   PASSMASK(PASS_NAME_JOIN(comp, COLOR)))
 
 typedef enum PassType {
-	PASS_NONE = 0,
-
-	/* Main passes */
-	PASS_COMBINED = 1,
-	PASS_DEPTH,
-	PASS_NORMAL,
-	PASS_UV,
-	PASS_OBJECT_ID,
-	PASS_MATERIAL_ID,
-	PASS_MOTION,
-	PASS_MOTION_WEIGHT,
+  PASS_NONE = 0,
+
+  /* Main passes */
+  PASS_COMBINED = 1,
+  PASS_DEPTH,
+  PASS_NORMAL,
+  PASS_UV,
+  PASS_OBJECT_ID,
+  PASS_MATERIAL_ID,
+  PASS_MOTION,
+  PASS_MOTION_WEIGHT,
 #ifdef __KERNEL_DEBUG__
-	PASS_BVH_TRAVERSED_NODES,
-	PASS_BVH_TRAVERSED_INSTANCES,
-	PASS_BVH_INTERSECTIONS,
-	PASS_RAY_BOUNCES,
+  PASS_BVH_TRAVERSED_NODES,
+  PASS_BVH_TRAVERSED_INSTANCES,
+  PASS_BVH_INTERSECTIONS,
+  PASS_RAY_BOUNCES,
 #endif
-	PASS_RENDER_TIME,
-	PASS_CRYPTOMATTE,
-	PASS_CATEGORY_MAIN_END = 31,
-
-	PASS_MIST = 32,
-	PASS_EMISSION,
-	PASS_BACKGROUND,
-	PASS_AO,
-	PASS_SHADOW,
-	PASS_LIGHT, /* no real pass, used to force use_light_pass */
-	PASS_DIFFUSE_DIRECT,
-	PASS_DIFFUSE_INDIRECT,
-	PASS_DIFFUSE_COLOR,
-	PASS_GLOSSY_DIRECT,
-	PASS_GLOSSY_INDIRECT,
-	PASS_GLOSSY_COLOR,
-	PASS_TRANSMISSION_DIRECT,
-	PASS_TRANSMISSION_INDIRECT,
-	PASS_TRANSMISSION_COLOR,
-	PASS_SUBSURFACE_DIRECT,
-	PASS_SUBSURFACE_INDIRECT,
-	PASS_SUBSURFACE_COLOR,
-	PASS_VOLUME_DIRECT,
-	PASS_VOLUME_INDIRECT,
-	/* No Scatter color since it's tricky to define what it would even mean. */
-	PASS_CATEGORY_LIGHT_END = 63,
+  PASS_RENDER_TIME,
+  PASS_CRYPTOMATTE,
+  PASS_CATEGORY_MAIN_END = 31,
+
+  PASS_MIST = 32,
+  PASS_EMISSION,
+  PASS_BACKGROUND,
+  PASS_AO,
+  PASS_SHADOW,
+  PASS_LIGHT, /* no real pass, used to force use_light_pass */
+  PASS_DIFFUSE_DIRECT,
+  PASS_DIFFUSE_INDIRECT,
+  PASS_DIFFUSE_COLOR,
+  PASS_GLOSSY_DIRECT,
+  PASS_GLOSSY_INDIRECT,
+  PASS_GLOSSY_COLOR,
+  PASS_TRANSMISSION_DIRECT,
+  PASS_TRANSMISSION_INDIRECT,
+  PASS_TRANSMISSION_COLOR,
+  PASS_SUBSURFACE_DIRECT,
+  PASS_SUBSURFACE_INDIRECT,
+  PASS_SUBSURFACE_COLOR,
+  PASS_VOLUME_DIRECT,
+  PASS_VOLUME_INDIRECT,
+  /* No Scatter color since it's tricky to define what it would even mean. */
+  PASS_CATEGORY_LIGHT_END = 63,
 } PassType;
 
 #define PASS_ANY (~0)
 
 typedef enum CryptomatteType {
-	CRYPT_NONE = 0,
-	CRYPT_OBJECT = (1 << 0),
-	CRYPT_MATERIAL = (1 << 1),
-	CRYPT_ASSET = (1 << 2),
-	CRYPT_ACCURATE = (1 << 3),
+  CRYPT_NONE = 0,
+  CRYPT_OBJECT = (1 << 0),
+  CRYPT_MATERIAL = (1 << 1),
+  CRYPT_ASSET = (1 << 2),
+  CRYPT_ACCURATE = (1 << 3),
 } CryptomatteType;
 
 typedef enum DenoisingPassOffsets {
-	DENOISING_PASS_NORMAL             = 0,
-	DENOISING_PASS_NORMAL_VAR         = 3,
-	DENOISING_PASS_ALBEDO             = 6,
-	DENOISING_PASS_ALBEDO_VAR         = 9,
-	DENOISING_PASS_DEPTH              = 12,
-	DENOISING_PASS_DEPTH_VAR          = 13,
-	DENOISING_PASS_SHADOW_A           = 14,
-	DENOISING_PASS_SHADOW_B           = 17,
-	DENOISING_PASS_COLOR              = 20,
-	DENOISING_PASS_COLOR_VAR          = 23,
-	DENOISING_PASS_CLEAN              = 26,
-
-	DENOISING_PASS_PREFILTERED_DEPTH     = 0,
-	DENOISING_PASS_PREFILTERED_NORMAL    = 1,
-	DENOISING_PASS_PREFILTERED_SHADOWING = 4,
-	DENOISING_PASS_PREFILTERED_ALBEDO    = 5,
-	DENOISING_PASS_PREFILTERED_COLOR     = 8,
-	DENOISING_PASS_PREFILTERED_VARIANCE  = 11,
-	DENOISING_PASS_PREFILTERED_INTENSITY = 14,
-
-	DENOISING_PASS_SIZE_BASE          = 26,
-	DENOISING_PASS_SIZE_CLEAN         = 3,
-	DENOISING_PASS_SIZE_PREFILTERED   = 15,
+  DENOISING_PASS_NORMAL = 0,
+  DENOISING_PASS_NORMAL_VAR = 3,
+  DENOISING_PASS_ALBEDO = 6,
+  DENOISING_PASS_ALBEDO_VAR = 9,
+  DENOISING_PASS_DEPTH = 12,
+  DENOISING_PASS_DEPTH_VAR = 13,
+  DENOISING_PASS_SHADOW_A = 14,
+  DENOISING_PASS_SHADOW_B = 17,
+  DENOISING_PASS_COLOR = 20,
+  DENOISING_PASS_COLOR_VAR = 23,
+  DENOISING_PASS_CLEAN = 26,
+
+  DENOISING_PASS_PREFILTERED_DEPTH = 0,
+  DENOISING_PASS_PREFILTERED_NORMAL = 1,
+  DENOISING_PASS_PREFILTERED_SHADOWING = 4,
+  DENOISING_PASS_PREFILTERED_ALBEDO = 5,
+  DENOISING_PASS_PREFILTERED_COLOR = 8,
+  DENOISING_PASS_PREFILTERED_VARIANCE = 11,
+  DENOISING_PASS_PREFILTERED_INTENSITY = 14,
+
+  DENOISING_PASS_SIZE_BASE = 26,
+  DENOISING_PASS_SIZE_CLEAN = 3,
+  DENOISING_PASS_SIZE_PREFILTERED = 15,
 } DenoisingPassOffsets;
 
 typedef enum eBakePassFilter {
-	BAKE_FILTER_NONE = 0,
-	BAKE_FILTER_DIRECT = (1 << 0),
-	BAKE_FILTER_INDIRECT = (1 << 1),
-	BAKE_FILTER_COLOR = (1 << 2),
-	BAKE_FILTER_DIFFUSE = (1 << 3),
-	BAKE_FILTER_GLOSSY = (1 << 4),
-	BAKE_FILTER_TRANSMISSION = (1 << 5),
-	BAKE_FILTER_SUBSURFACE = (1 << 6),
-	BAKE_FILTER_EMISSION = (1 << 7),
-	BAKE_FILTER_AO = (1 << 8),
+  BAKE_FILTER_NONE = 0,
+  BAKE_FILTER_DIRECT = (1 << 0),
+  BAKE_FILTER_INDIRECT = (1 << 1),
+  BAKE_FILTER_COLOR = (1 << 2),
+  BAKE_FILTER_DIFFUSE = (1 << 3),
+  BAKE_FILTER_GLOSSY = (1 << 4),
+  BAKE_FILTER_TRANSMISSION = (1 << 5),
+  BAKE_FILTER_SUBSURFACE = (1 << 6),
+  BAKE_FILTER_EMISSION = (1 << 7),
+  BAKE_FILTER_AO = (1 << 8),
 } eBakePassFilter;
 
 typedef enum BakePassFilterCombos {
-	BAKE_FILTER_COMBINED = (
-	    BAKE_FILTER_DIRECT |
-	    BAKE_FILTER_INDIRECT |
-	    BAKE_FILTER_DIFFUSE |
-	    BAKE_FILTER_GLOSSY |
-	    BAKE_FILTER_TRANSMISSION |
-	    BAKE_FILTER_SUBSURFACE |
-	    BAKE_FILTER_EMISSION |
-	    BAKE_FILTER_AO),
-	BAKE_FILTER_DIFFUSE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_DIFFUSE),
-	BAKE_FILTER_GLOSSY_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_GLOSSY),
-	BAKE_FILTER_TRANSMISSION_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_TRANSMISSION),
-	BAKE_FILTER_SUBSURFACE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_SUBSURFACE),
-	BAKE_FILTER_DIFFUSE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE),
-	BAKE_FILTER_GLOSSY_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_GLOSSY),
-	BAKE_FILTER_TRANSMISSION_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_TRANSMISSION),
-	BAKE_FILTER_SUBSURFACE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_SUBSURFACE),
+  BAKE_FILTER_COMBINED = (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE |
+                          BAKE_FILTER_GLOSSY | BAKE_FILTER_TRANSMISSION | BAKE_FILTER_SUBSURFACE |
+                          BAKE_FILTER_EMISSION | BAKE_FILTER_AO),
+  BAKE_FILTER_DIFFUSE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_DIFFUSE),
+  BAKE_FILTER_GLOSSY_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_GLOSSY),
+  BAKE_FILTER_TRANSMISSION_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_TRANSMISSION),
+  BAKE_FILTER_SUBSURFACE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_SUBSURFACE),
+  BAKE_FILTER_DIFFUSE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE),
+  BAKE_FILTER_GLOSSY_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_GLOSSY),
+  BAKE_FILTER_TRANSMISSION_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_TRANSMISSION),
+  BAKE_FILTER_SUBSURFACE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_SUBSURFACE),
 } BakePassFilterCombos;
 
 typedef enum DenoiseFlag {
-	DENOISING_CLEAN_DIFFUSE_DIR      = (1 << 0),
-	DENOISING_CLEAN_DIFFUSE_IND      = (1 << 1),
-	DENOISING_CLEAN_GLOSSY_DIR       = (1 << 2),
-	DENOISING_CLEAN_GLOSSY_IND       = (1 << 3),
-	DENOISING_CLEAN_TRANSMISSION_DIR = (1 << 4),
-	DENOISING_CLEAN_TRANSMISSION_IND = (1 << 5),
-	DENOISING_CLEAN_SUBSURFACE_DIR   = (1 << 6),
-	DENOISING_CLEAN_SUBSURFACE_IND   = (1 << 7),
-	DENOISING_CLEAN_ALL_PASSES       = (1 << 8)-1,
+  DENOISING_CLEAN_DIFFUSE_DIR = (1 << 0),
+  DENOISING_CLEAN_DIFFUSE_IND = (1 << 1),
+  DENOISING_CLEAN_GLOSSY_DIR = (1 << 2),
+  DENOISING_CLEAN_GLOSSY_IND = (1 << 3),
+  DENOISING_CLEAN_TRANSMISSION_DIR = (1 << 4),
+  DENOISING_CLEAN_TRANSMISSION_IND = (1 << 5),
+  DENOISING_CLEAN_SUBSURFACE_DIR = (1 << 6),
+  DENOISING_CLEAN_SUBSURFACE_IND = (1 << 7),
+  DENOISING_CLEAN_ALL_PASSES = (1 << 8) - 1,
 } DenoiseFlag;
 
 #ifdef __KERNEL_DEBUG__
@@ -476,173 +471,171 @@ typedef enum DenoiseFlag {
  * really important here.
  */
 typedef struct DebugData {
-	int num_bvh_traversed_nodes;
-	int num_bvh_traversed_instances;
-	int num_bvh_intersections;
-	int num_ray_bounces;
+  int num_bvh_traversed_nodes;
+  int num_bvh_traversed_instances;
+  int num_bvh_intersections;
+  int num_ray_bounces;
 } DebugData;
 #endif
 
 typedef ccl_addr_space struct PathRadianceState {
 #ifdef __PASSES__
-	float3 diffuse;
-	float3 glossy;
-	float3 transmission;
-	float3 subsurface;
-	float3 scatter;
+  float3 diffuse;
+  float3 glossy;
+  float3 transmission;
+  float3 subsurface;
+  float3 scatter;
 
-	float3 direct;
+  float3 direct;
 #endif
 } PathRadianceState;
 
 typedef ccl_addr_space struct PathRadiance {
 #ifdef __PASSES__
-	int use_light_pass;
+  int use_light_pass;
 #endif
 
-	float transparent;
-	float3 emission;
+  float transparent;
+  float3 emission;
 #ifdef __PASSES__
-	float3 background;
-	float3 ao;
-
-	float3 indirect;
-	float3 direct_emission;
-
-	float3 color_diffuse;
-	float3 color_glossy;
-	float3 color_transmission;
-	float3 color_subsurface;
-
-	float3 direct_diffuse;
-	float3 direct_glossy;
-	float3 direct_transmission;
-	float3 direct_subsurface;
-	float3 direct_scatter;
-
-	float3 indirect_diffuse;
-	float3 indirect_glossy;
-	float3 indirect_transmission;
-	float3 indirect_subsurface;
-	float3 indirect_scatter;
-
-	float4 shadow;
-	float mist;
+  float3 background;
+  float3 ao;
+
+  float3 indirect;
+  float3 direct_emission;
+
+  float3 color_diffuse;
+  float3 color_glossy;
+  float3 color_transmission;
+  float3 color_subsurface;
+
+  float3 direct_diffuse;
+  float3 direct_glossy;
+  float3 direct_transmission;
+  float3 direct_subsurface;
+  float3 direct_scatter;
+
+  float3 indirect_diffuse;
+  float3 indirect_glossy;
+  float3 indirect_transmission;
+  float3 indirect_subsurface;
+  float3 indirect_scatter;
+
+  float4 shadow;
+  float mist;
 #endif
 
-	struct PathRadianceState state;
+  struct PathRadianceState state;
 
 #ifdef __SHADOW_TRICKS__
-	/* Total light reachable across the path, ignoring shadow blocked queries. */
-	float3 path_total;
-	/* Total light reachable across the path with shadow blocked queries
-	 * applied here.
-	 *
-	 * Dividing this figure by path_total will give estimate of shadow pass.
-	 */
-	float3 path_total_shaded;
-
-	/* Color of the background on which shadow is alpha-overed. */
-	float3 shadow_background_color;
-
-	/* Path radiance sum and throughput at the moment when ray hits shadow
-	 * catcher object.
-	 */
-	float shadow_throughput;
-
-	/* Accumulated transparency along the path after shadow catcher bounce. */
-	float shadow_transparency;
-
-	/* Indicate if any shadow catcher data is set. */
-	int has_shadow_catcher;
+  /* Total light reachable across the path, ignoring shadow blocked queries. */
+  float3 path_total;
+  /* Total light reachable across the path with shadow blocked queries
+   * applied here.
+   *
+   * Dividing this figure by path_total will give estimate of shadow pass.
+   */
+  float3 path_total_shaded;
+
+  /* Color of the background on which shadow is alpha-overed. */
+  float3 shadow_background_color;
+
+  /* Path radiance sum and throughput at the moment when ray hits shadow
+   * catcher object.
+   */
+  float shadow_throughput;
+
+  /* Accumulated transparency along the path after shadow catcher bounce. */
+  float shadow_transparency;
+
+  /* Indicate if any shadow catcher data is set. */
+  int has_shadow_catcher;
 #endif
 
 #ifdef __DENOISING_FEATURES__
-	float3 denoising_normal;
-	float3 denoising_albedo;
-	float denoising_depth;
-#endif  /* __DENOISING_FEATURES__ */
+  float3 denoising_normal;
+  float3 denoising_albedo;
+  float denoising_depth;
+#endif /* __DENOISING_FEATURES__ */
 
 #ifdef __KERNEL_DEBUG__
-	DebugData debug_data;
-#endif  /* __KERNEL_DEBUG__ */
+  DebugData debug_data;
+#endif /* __KERNEL_DEBUG__ */
 } PathRadiance;
 
 typedef struct BsdfEval {
 #ifdef __PASSES__
-	int use_light_pass;
+  int use_light_pass;
 #endif
 
-	float3 diffuse;
+  float3 diffuse;
 #ifdef __PASSES__
-	float3 glossy;
-	float3 transmission;
-	float3 transparent;
-	float3 subsurface;
-	float3 scatter;
+  float3 glossy;
+  float3 transmission;
+  float3 transparent;
+  float3 subsurface;
+  float3 scatter;
 #endif
 #ifdef __SHADOW_TRICKS__
-	float3 sum_no_mis;
+  float3 sum_no_mis;
 #endif
 } BsdfEval;
 
 /* Shader Flag */
 
 typedef enum ShaderFlag {
-	SHADER_SMOOTH_NORMAL = (1 << 31),
-	SHADER_CAST_SHADOW = (1 << 30),
-	SHADER_AREA_LIGHT = (1 << 29),
-	SHADER_USE_MIS = (1 << 28),
-	SHADER_EXCLUDE_DIFFUSE = (1 << 27),
-	SHADER_EXCLUDE_GLOSSY = (1 << 26),
-	SHADER_EXCLUDE_TRANSMIT = (1 << 25),
-	SHADER_EXCLUDE_CAMERA = (1 << 24),
-	SHADER_EXCLUDE_SCATTER = (1 << 23),
-	SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA|SHADER_EXCLUDE_SCATTER),
-
-	SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT|SHADER_USE_MIS|SHADER_EXCLUDE_ANY)
+  SHADER_SMOOTH_NORMAL = (1 << 31),
+  SHADER_CAST_SHADOW = (1 << 30),
+  SHADER_AREA_LIGHT = (1 << 29),
+  SHADER_USE_MIS = (1 << 28),
+  SHADER_EXCLUDE_DIFFUSE = (1 << 27),
+  SHADER_EXCLUDE_GLOSSY = (1 << 26),
+  SHADER_EXCLUDE_TRANSMIT = (1 << 25),
+  SHADER_EXCLUDE_CAMERA = (1 << 24),
+  SHADER_EXCLUDE_SCATTER = (1 << 23),
+  SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE | SHADER_EXCLUDE_GLOSSY | SHADER_EXCLUDE_TRANSMIT |
+                        SHADER_EXCLUDE_CAMERA | SHADER_EXCLUDE_SCATTER),
+
+  SHADER_MASK = ~(SHADER_SMOOTH_NORMAL | SHADER_CAST_SHADOW | SHADER_AREA_LIGHT | SHADER_USE_MIS |
+                  SHADER_EXCLUDE_ANY)
 } ShaderFlag;
 
 /* Light Type */
 
 typedef enum LightType {
-	LIGHT_POINT,
-	LIGHT_DISTANT,
-	LIGHT_BACKGROUND,
-	LIGHT_AREA,
-	LIGHT_SPOT,
-	LIGHT_TRIANGLE
+  LIGHT_POINT,
+  LIGHT_DISTANT,
+  LIGHT_BACKGROUND,
+  LIGHT_AREA,
+  LIGHT_SPOT,
+  LIGHT_TRIANGLE
 } LightType;
 
 /* Camera Type */
 
-enum CameraType {
-	CAMERA_PERSPECTIVE,
-	CAMERA_ORTHOGRAPHIC,
-	CAMERA_PANORAMA
-};
+enum CameraType { CAMERA_PERSPECTIVE, CAMERA_ORTHOGRAPHIC, CAMERA_PANORAMA };
 
 /* Panorama Type */
 
 enum PanoramaType {
-	PANORAMA_EQUIRECTANGULAR = 0,
-	PANORAMA_FISHEYE_EQUIDISTANT = 1,
-	PANORAMA_FISHEYE_EQUISOLID = 2,
-	PANORAMA_MIRRORBALL = 3,
+  PANORAMA_EQUIRECTANGULAR = 0,
+  PANORAMA_FISHEYE_EQUIDISTANT = 1,
+  PANORAMA_FISHEYE_EQUISOLID = 2,
+  PANORAMA_MIRRORBALL = 3,
 
-	PANORAMA_NUM_TYPES,
+  PANORAMA_NUM_TYPES,
 };
 
 /* Differential */
 
 typedef struct differential3 {
-	float3 dx;
-	float3 dy;
+  float3 dx;
+  float3 dy;
 } differential3;
 
 typedef struct differential {
-	float dx;
-	float dy;
+  float dx;
+  float dy;
 } differential;
 
 /* Ray */
@@ -657,21 +650,21 @@ typedef struct Ray {
  * is fixed.
  */
 #ifndef __KERNEL_OPENCL_AMD__
-	float3 P;		/* origin */
-	float3 D;		/* direction */
+  float3 P; /* origin */
+  float3 D; /* direction */
 
-	float t;		/* length of the ray */
-	float time;		/* time (for motion blur) */
+  float t;    /* length of the ray */
+  float time; /* time (for motion blur) */
 #else
-	float t;		/* length of the ray */
-	float time;		/* time (for motion blur) */
-	float3 P;		/* origin */
-	float3 D;		/* direction */
+  float t;    /* length of the ray */
+  float time; /* time (for motion blur) */
+  float3 P;   /* origin */
+  float3 D;   /* direction */
 #endif
 
 #ifdef __RAY_DIFFERENTIALS__
-	differential3 dP;
-	differential3 dD;
+  differential3 dP;
+  differential3 dD;
 #endif
 } Ray;
 
@@ -679,42 +672,42 @@ typedef struct Ray {
 
 typedef struct Intersection {
 #ifdef __EMBREE__
-	float3 Ng;
+  float3 Ng;
 #endif
-	float t, u, v;
-	int prim;
-	int object;
-	int type;
+  float t, u, v;
+  int prim;
+  int object;
+  int type;
 
 #ifdef __KERNEL_DEBUG__
-	int num_traversed_nodes;
-	int num_traversed_instances;
-	int num_intersections;
+  int num_traversed_nodes;
+  int num_traversed_instances;
+  int num_intersections;
 #endif
 } Intersection;
 
 /* Primitives */
 
 typedef enum PrimitiveType {
-	PRIMITIVE_NONE            = 0,
-	PRIMITIVE_TRIANGLE        = (1 << 0),
-	PRIMITIVE_MOTION_TRIANGLE = (1 << 1),
-	PRIMITIVE_CURVE           = (1 << 2),
-	PRIMITIVE_MOTION_CURVE    = (1 << 3),
-	/* Lamp primitive is not included below on purpose,
-	 * since it is no real traceable primitive.
-	 */
-	PRIMITIVE_LAMP            = (1 << 4),
-
-	PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE|PRIMITIVE_MOTION_TRIANGLE),
-	PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE|PRIMITIVE_MOTION_CURVE),
-	PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE|PRIMITIVE_MOTION_CURVE),
-	PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE|PRIMITIVE_ALL_CURVE),
-
-	/* Total number of different traceable primitives.
-	 * NOTE: This is an actual value, not a bitflag.
-	 */
-	PRIMITIVE_NUM_TOTAL = 4,
+  PRIMITIVE_NONE = 0,
+  PRIMITIVE_TRIANGLE = (1 << 0),
+  PRIMITIVE_MOTION_TRIANGLE = (1 << 1),
+  PRIMITIVE_CURVE = (1 << 2),
+  PRIMITIVE_MOTION_CURVE = (1 << 3),
+  /* Lamp primitive is not included below on purpose,
+   * since it is no real traceable primitive.
+   */
+  PRIMITIVE_LAMP = (1 << 4),
+
+  PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE | PRIMITIVE_MOTION_TRIANGLE),
+  PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE | PRIMITIVE_MOTION_CURVE),
+  PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE | PRIMITIVE_MOTION_CURVE),
+  PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE | PRIMITIVE_ALL_CURVE),
+
+  /* Total number of different traceable primitives.
+   * NOTE: This is an actual value, not a bitflag.
+   */
+  PRIMITIVE_NUM_TOTAL = 4,
 } PrimitiveType;
 
 #define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << PRIMITIVE_NUM_TOTAL) | (type))
@@ -723,68 +716,68 @@ typedef enum PrimitiveType {
 /* Attributes */
 
 typedef enum AttributePrimitive {
-	ATTR_PRIM_TRIANGLE = 0,
-	ATTR_PRIM_CURVE,
-	ATTR_PRIM_SUBD,
+  ATTR_PRIM_TRIANGLE = 0,
+  ATTR_PRIM_CURVE,
+  ATTR_PRIM_SUBD,
 
-	ATTR_PRIM_TYPES
+  ATTR_PRIM_TYPES
 } AttributePrimitive;
 
 typedef enum AttributeElement {
-	ATTR_ELEMENT_NONE,
-	ATTR_ELEMENT_OBJECT,
-	ATTR_ELEMENT_MESH,
-	ATTR_ELEMENT_FACE,
-	ATTR_ELEMENT_VERTEX,
-	ATTR_ELEMENT_VERTEX_MOTION,
-	ATTR_ELEMENT_CORNER,
-	ATTR_ELEMENT_CORNER_BYTE,
-	ATTR_ELEMENT_CURVE,
-	ATTR_ELEMENT_CURVE_KEY,
-	ATTR_ELEMENT_CURVE_KEY_MOTION,
-	ATTR_ELEMENT_VOXEL
+  ATTR_ELEMENT_NONE,
+  ATTR_ELEMENT_OBJECT,
+  ATTR_ELEMENT_MESH,
+  ATTR_ELEMENT_FACE,
+  ATTR_ELEMENT_VERTEX,
+  ATTR_ELEMENT_VERTEX_MOTION,
+  ATTR_ELEMENT_CORNER,
+  ATTR_ELEMENT_CORNER_BYTE,
+  ATTR_ELEMENT_CURVE,
+  ATTR_ELEMENT_CURVE_KEY,
+  ATTR_ELEMENT_CURVE_KEY_MOTION,
+  ATTR_ELEMENT_VOXEL
 } AttributeElement;
 
 typedef enum AttributeStandard {
-	ATTR_STD_NONE = 0,
-	ATTR_STD_VERTEX_NORMAL,
-	ATTR_STD_FACE_NORMAL,
-	ATTR_STD_UV,
-	ATTR_STD_UV_TANGENT,
-	ATTR_STD_UV_TANGENT_SIGN,
-	ATTR_STD_GENERATED,
-	ATTR_STD_GENERATED_TRANSFORM,
-	ATTR_STD_POSITION_UNDEFORMED,
-	ATTR_STD_POSITION_UNDISPLACED,
-	ATTR_STD_MOTION_VERTEX_POSITION,
-	ATTR_STD_MOTION_VERTEX_NORMAL,
-	ATTR_STD_PARTICLE,
-	ATTR_STD_CURVE_INTERCEPT,
-	ATTR_STD_CURVE_RANDOM,
-	ATTR_STD_PTEX_FACE_ID,
-	ATTR_STD_PTEX_UV,
-	ATTR_STD_VOLUME_DENSITY,
-	ATTR_STD_VOLUME_COLOR,
-	ATTR_STD_VOLUME_FLAME,
-	ATTR_STD_VOLUME_HEAT,
-	ATTR_STD_VOLUME_TEMPERATURE,
-	ATTR_STD_VOLUME_VELOCITY,
-	ATTR_STD_POINTINESS,
-	ATTR_STD_NUM,
-
-	ATTR_STD_NOT_FOUND = ~0
+  ATTR_STD_NONE = 0,
+  ATTR_STD_VERTEX_NORMAL,
+  ATTR_STD_FACE_NORMAL,
+  ATTR_STD_UV,
+  ATTR_STD_UV_TANGENT,
+  ATTR_STD_UV_TANGENT_SIGN,
+  ATTR_STD_GENERATED,
+  ATTR_STD_GENERATED_TRANSFORM,
+  ATTR_STD_POSITION_UNDEFORMED,
+  ATTR_STD_POSITION_UNDISPLACED,
+  ATTR_STD_MOTION_VERTEX_POSITION,
+  ATTR_STD_MOTION_VERTEX_NORMAL,
+  ATTR_STD_PARTICLE,
+  ATTR_STD_CURVE_INTERCEPT,
+  ATTR_STD_CURVE_RANDOM,
+  ATTR_STD_PTEX_FACE_ID,
+  ATTR_STD_PTEX_UV,
+  ATTR_STD_VOLUME_DENSITY,
+  ATTR_STD_VOLUME_COLOR,
+  ATTR_STD_VOLUME_FLAME,
+  ATTR_STD_VOLUME_HEAT,
+  ATTR_STD_VOLUME_TEMPERATURE,
+  ATTR_STD_VOLUME_VELOCITY,
+  ATTR_STD_POINTINESS,
+  ATTR_STD_NUM,
+
+  ATTR_STD_NOT_FOUND = ~0
 } AttributeStandard;
 
 typedef enum AttributeFlag {
-	ATTR_FINAL_SIZE = (1 << 0),
-	ATTR_SUBDIVIDED = (1 << 1),
+  ATTR_FINAL_SIZE = (1 << 0),
+  ATTR_SUBDIVIDED = (1 << 1),
 } AttributeFlag;
 
 typedef struct AttributeDescriptor {
-	AttributeElement element;
-	NodeAttributeType type;
-	uint flags; /* see enum AttributeFlag */
-	int offset;
+  AttributeElement element;
+  NodeAttributeType type;
+  uint flags; /* see enum AttributeFlag */
+  int offset;
 } AttributeDescriptor;
 
 /* Closure data */
@@ -794,7 +787,7 @@ typedef struct AttributeDescriptor {
 #    define MAX_CLOSURE 1
 #  else
 #    ifndef __MAX_CLOSURE__
-#       define MAX_CLOSURE 64
+#      define MAX_CLOSURE 64
 #    else
 #      define MAX_CLOSURE __MAX_CLOSURE__
 #    endif
@@ -815,16 +808,18 @@ typedef struct AttributeDescriptor {
  * we assume to be the maximum required alignment for any struct. */
 
 #define SHADER_CLOSURE_BASE \
-	float3 weight; \
-	ClosureType type; \
-	float sample_weight; \
-	float3 N
+  float3 weight; \
+  ClosureType type; \
+  float sample_weight; \
+  float3 N
 
-typedef ccl_addr_space struct ccl_align(16) ShaderClosure {
-	SHADER_CLOSURE_BASE;
+typedef ccl_addr_space struct ccl_align(16) ShaderClosure
+{
+  SHADER_CLOSURE_BASE;
 
-	float data[10]; /* pad to 80 bytes */
-} ShaderClosure;
+  float data[10]; /* pad to 80 bytes */
+}
+ShaderClosure;
 
 /* Shader Data
  *
@@ -833,272 +828,253 @@ typedef ccl_addr_space struct ccl_align(16) ShaderClosure {
  */
 
 enum ShaderDataFlag {
-	/* Runtime flags. */
-
-	/* Set when ray hits backside of surface. */
-	SD_BACKFACING      = (1 << 0),
-	/* Shader has non-zero emission. */
-	SD_EMISSION        = (1 << 1),
-	/* Shader has BSDF closure. */
-	SD_BSDF            = (1 << 2),
-	/* Shader has non-singular BSDF closure. */
-	SD_BSDF_HAS_EVAL   = (1 << 3),
-	/* Shader has BSSRDF closure. */
-	SD_BSSRDF          = (1 << 4),
-	/* Shader has holdout closure. */
-	SD_HOLDOUT         = (1 << 5),
-	/* Shader has non-zero volume extinction. */
-	SD_EXTINCTION      = (1 << 6),
-	/* Shader has have volume phase (scatter) closure. */
-	SD_SCATTER         = (1 << 7),
-	/* Shader has transparent closure. */
-	SD_TRANSPARENT     = (1 << 9),
-	/* BSDF requires LCG for evaluation. */
-	SD_BSDF_NEEDS_LCG  = (1 << 10),
-
-	SD_CLOSURE_FLAGS = (SD_EMISSION |
-	                    SD_BSDF |
-	                    SD_BSDF_HAS_EVAL |
-	                    SD_BSSRDF |
-	                    SD_HOLDOUT |
-	                    SD_EXTINCTION |
-	                    SD_SCATTER |
-	                    SD_BSDF_NEEDS_LCG),
-
-	/* Shader flags. */
-
-	/* direct light sample */
-	SD_USE_MIS                = (1 << 16),
-	/* Has transparent shadow. */
-	SD_HAS_TRANSPARENT_SHADOW = (1 << 17),
-	/* Has volume shader. */
-	SD_HAS_VOLUME             = (1 << 18),
-	/* Has only volume shader, no surface. */
-	SD_HAS_ONLY_VOLUME        = (1 << 19),
-	/* Has heterogeneous volume. */
-	SD_HETEROGENEOUS_VOLUME   = (1 << 20),
-	/* BSSRDF normal uses bump. */
-	SD_HAS_BSSRDF_BUMP        = (1 << 21),
-	/* Use equiangular volume sampling */
-	SD_VOLUME_EQUIANGULAR     = (1 << 22),
-	/* Use multiple importance volume sampling. */
-	SD_VOLUME_MIS             = (1 << 23),
-	/* Use cubic interpolation for voxels. */
-	SD_VOLUME_CUBIC           = (1 << 24),
-	/* Has data connected to the displacement input or uses bump map. */
-	SD_HAS_BUMP               = (1 << 25),
-	/* Has true displacement. */
-	SD_HAS_DISPLACEMENT       = (1 << 26),
-	/* Has constant emission (value stored in __shaders) */
-	SD_HAS_CONSTANT_EMISSION  = (1 << 27),
-	/* Needs to access attributes */
-	SD_NEED_ATTRIBUTES        = (1 << 28),
-
-	SD_SHADER_FLAGS = (SD_USE_MIS |
-	                   SD_HAS_TRANSPARENT_SHADOW |
-	                   SD_HAS_VOLUME |
-	                   SD_HAS_ONLY_VOLUME |
-	                   SD_HETEROGENEOUS_VOLUME |
-	                   SD_HAS_BSSRDF_BUMP |
-	                   SD_VOLUME_EQUIANGULAR |
-	                   SD_VOLUME_MIS |
-	                   SD_VOLUME_CUBIC |
-	                   SD_HAS_BUMP |
-	                   SD_HAS_DISPLACEMENT |
-	                   SD_HAS_CONSTANT_EMISSION |
-	                   SD_NEED_ATTRIBUTES)
+  /* Runtime flags. */
+
+  /* Set when ray hits backside of surface. */
+  SD_BACKFACING = (1 << 0),
+  /* Shader has non-zero emission. */
+  SD_EMISSION = (1 << 1),
+  /* Shader has BSDF closure. */
+  SD_BSDF = (1 << 2),
+  /* Shader has non-singular BSDF closure. */
+  SD_BSDF_HAS_EVAL = (1 << 3),
+  /* Shader has BSSRDF closure. */
+  SD_BSSRDF = (1 << 4),
+  /* Shader has holdout closure. */
+  SD_HOLDOUT = (1 << 5),
+  /* Shader has non-zero volume extinction. */
+  SD_EXTINCTION = (1 << 6),
+  /* Shader has have volume phase (scatter) closure. */
+  SD_SCATTER = (1 << 7),
+  /* Shader has transparent closure. */
+  SD_TRANSPARENT = (1 << 9),
+  /* BSDF requires LCG for evaluation. */
+  SD_BSDF_NEEDS_LCG = (1 << 10),
+
+  SD_CLOSURE_FLAGS = (SD_EMISSION | SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSSRDF | SD_HOLDOUT |
+                      SD_EXTINCTION | SD_SCATTER | SD_BSDF_NEEDS_LCG),
+
+  /* Shader flags. */
+
+  /* direct light sample */
+  SD_USE_MIS = (1 << 16),
+  /* Has transparent shadow. */
+  SD_HAS_TRANSPARENT_SHADOW = (1 << 17),
+  /* Has volume shader. */
+  SD_HAS_VOLUME = (1 << 18),
+  /* Has only volume shader, no surface. */
+  SD_HAS_ONLY_VOLUME = (1 << 19),
+  /* Has heterogeneous volume. */
+  SD_HETEROGENEOUS_VOLUME = (1 << 20),
+  /* BSSRDF normal uses bump. */
+  SD_HAS_BSSRDF_BUMP = (1 << 21),
+  /* Use equiangular volume sampling */
+  SD_VOLUME_EQUIANGULAR = (1 << 22),
+  /* Use multiple importance volume sampling. */
+  SD_VOLUME_MIS = (1 << 23),
+  /* Use cubic interpolation for voxels. */
+  SD_VOLUME_CUBIC = (1 << 24),
+  /* Has data connected to the displacement input or uses bump map. */
+  SD_HAS_BUMP = (1 << 25),
+  /* Has true displacement. */
+  SD_HAS_DISPLACEMENT = (1 << 26),
+  /* Has constant emission (value stored in __shaders) */
+  SD_HAS_CONSTANT_EMISSION = (1 << 27),
+  /* Needs to access attributes */
+  SD_NEED_ATTRIBUTES = (1 << 28),
+
+  SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME |
+                     SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR |
+                     SD_VOLUME_MIS | SD_VOLUME_CUBIC | SD_HAS_BUMP | SD_HAS_DISPLACEMENT |
+                     SD_HAS_CONSTANT_EMISSION | SD_NEED_ATTRIBUTES)
 };
 
-	/* Object flags. */
+/* Object flags. */
 enum ShaderDataObjectFlag {
-	/* Holdout for camera rays. */
-	SD_OBJECT_HOLDOUT_MASK           = (1 << 0),
-	/* Has object motion blur. */
-	SD_OBJECT_MOTION                 = (1 << 1),
-	/* Vertices have transform applied. */
-	SD_OBJECT_TRANSFORM_APPLIED      = (1 << 2),
-	/* Vertices have negative scale applied. */
-	SD_OBJECT_NEGATIVE_SCALE_APPLIED = (1 << 3),
-	/* Object has a volume shader. */
-	SD_OBJECT_HAS_VOLUME             = (1 << 4),
-	/* Object intersects AABB of an object with volume shader. */
-	SD_OBJECT_INTERSECTS_VOLUME      = (1 << 5),
-	/* Has position for motion vertices. */
-	SD_OBJECT_HAS_VERTEX_MOTION      = (1 << 6),
-	/* object is used to catch shadows */
-	SD_OBJECT_SHADOW_CATCHER         = (1 << 7),
-	/* object has volume attributes */
-	SD_OBJECT_HAS_VOLUME_ATTRIBUTES  = (1 << 8),
-
-	SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK |
-	                   SD_OBJECT_MOTION |
-	                   SD_OBJECT_TRANSFORM_APPLIED |
-	                   SD_OBJECT_NEGATIVE_SCALE_APPLIED |
-	                   SD_OBJECT_HAS_VOLUME |
-	                   SD_OBJECT_INTERSECTS_VOLUME |
-	                   SD_OBJECT_SHADOW_CATCHER |
-	                   SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
+  /* Holdout for camera rays. */
+  SD_OBJECT_HOLDOUT_MASK = (1 << 0),
+  /* Has object motion blur. */
+  SD_OBJECT_MOTION = (1 << 1),
+  /* Vertices have transform applied. */
+  SD_OBJECT_TRANSFORM_APPLIED = (1 << 2),
+  /* Vertices have negative scale applied. */
+  SD_OBJECT_NEGATIVE_SCALE_APPLIED = (1 << 3),
+  /* Object has a volume shader. */
+  SD_OBJECT_HAS_VOLUME = (1 << 4),
+  /* Object intersects AABB of an object with volume shader. */
+  SD_OBJECT_INTERSECTS_VOLUME = (1 << 5),
+  /* Has position for motion vertices. */
+  SD_OBJECT_HAS_VERTEX_MOTION = (1 << 6),
+  /* object is used to catch shadows */
+  SD_OBJECT_SHADOW_CATCHER = (1 << 7),
+  /* object has volume attributes */
+  SD_OBJECT_HAS_VOLUME_ATTRIBUTES = (1 << 8),
+
+  SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK | SD_OBJECT_MOTION | SD_OBJECT_TRANSFORM_APPLIED |
+                     SD_OBJECT_NEGATIVE_SCALE_APPLIED | SD_OBJECT_HAS_VOLUME |
+                     SD_OBJECT_INTERSECTS_VOLUME | SD_OBJECT_SHADOW_CATCHER |
+                     SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
 };
 
 typedef ccl_addr_space struct ShaderData {
-	/* position */
-	float3 P;
-	/* smooth normal for shading */
-	float3 N;
-	/* true geometric normal */
-	float3 Ng;
-	/* view/incoming direction */
-	float3 I;
-	/* shader id */
-	int shader;
-	/* booleans describing shader, see ShaderDataFlag */
-	int flag;
-	/* booleans describing object of the shader, see ShaderDataObjectFlag */
-	int object_flag;
-
-	/* primitive id if there is one, ~0 otherwise */
-	int prim;
-
-	/* combined type and curve segment for hair */
-	int type;
-
-	/* parametric coordinates
-	 * - barycentric weights for triangles */
-	float u;
-	float v;
-	/* object id if there is one, ~0 otherwise */
-	int object;
-	/* lamp id if there is one, ~0 otherwise */
-	int lamp;
-
-	/* motion blur sample time */
-	float time;
-
-	/* length of the ray being shaded */
-	float ray_length;
+  /* position */
+  float3 P;
+  /* smooth normal for shading */
+  float3 N;
+  /* true geometric normal */
+  float3 Ng;
+  /* view/incoming direction */
+  float3 I;
+  /* shader id */
+  int shader;
+  /* booleans describing shader, see ShaderDataFlag */
+  int flag;
+  /* booleans describing object of the shader, see ShaderDataObjectFlag */
+  int object_flag;
+
+  /* primitive id if there is one, ~0 otherwise */
+  int prim;
+
+  /* combined type and curve segment for hair */
+  int type;
+
+  /* parametric coordinates
+   * - barycentric weights for triangles */
+  float u;
+  float v;
+  /* object id if there is one, ~0 otherwise */
+  int object;
+  /* lamp id if there is one, ~0 otherwise */
+  int lamp;
+
+  /* motion blur sample time */
+  float time;
+
+  /* length of the ray being shaded */
+  float ray_length;
 
 #ifdef __RAY_DIFFERENTIALS__
-	/* differential of P. these are orthogonal to Ng, not N */
-	differential3 dP;
-	/* differential of I */
-	differential3 dI;
-	/* differential of u, v */
-	differential du;
-	differential dv;
+  /* differential of P. these are orthogonal to Ng, not N */
+  differential3 dP;
+  /* differential of I */
+  differential3 dI;
+  /* differential of u, v */
+  differential du;
+  differential dv;
 #endif
 #ifdef __DPDU__
-	/* differential of P w.r.t. parametric coordinates. note that dPdu is
-	 * not readily suitable as a tangent for shading on triangles. */
-	float3 dPdu;
-	float3 dPdv;
+  /* differential of P w.r.t. parametric coordinates. note that dPdu is
+   * not readily suitable as a tangent for shading on triangles. */
+  float3 dPdu;
+  float3 dPdv;
 #endif
 
 #ifdef __OBJECT_MOTION__
-	/* object <-> world space transformations, cached to avoid
-	 * re-interpolating them constantly for shading */
-	Transform ob_tfm;
-	Transform ob_itfm;
+  /* object <-> world space transformations, cached to avoid
+   * re-interpolating them constantly for shading */
+  Transform ob_tfm;
+  Transform ob_itfm;
 #endif
 
-	/* ray start position, only set for backgrounds */
-	float3 ray_P;
-	differential3 ray_dP;
+  /* ray start position, only set for backgrounds */
+  float3 ray_P;
+  differential3 ray_dP;
 
 #ifdef __OSL__
-	struct KernelGlobals *osl_globals;
-	struct PathState *osl_path_state;
+  struct KernelGlobals *osl_globals;
+  struct PathState *osl_path_state;
 #endif
 
-	/* LCG state for closures that require additional random numbers. */
-	uint lcg_state;
+  /* LCG state for closures that require additional random numbers. */
+  uint lcg_state;
 
-	/* Closure data, we store a fixed array of closures */
-	int num_closure;
-	int num_closure_left;
-	float randb_closure;
-	float3 svm_closure_weight;
+  /* Closure data, we store a fixed array of closures */
+  int num_closure;
+  int num_closure_left;
+  float randb_closure;
+  float3 svm_closure_weight;
 
-	/* Closure weights summed directly, so we can evaluate
-	 * emission and shadow transparency with MAX_CLOSURE 0. */
-	float3 closure_emission_background;
-	float3 closure_transparent_extinction;
+  /* Closure weights summed directly, so we can evaluate
+   * emission and shadow transparency with MAX_CLOSURE 0. */
+  float3 closure_emission_background;
+  float3 closure_transparent_extinction;
 
-	/* At the end so we can adjust size in ShaderDataTinyStorage. */
-	struct ShaderClosure closure[MAX_CLOSURE];
+  /* At the end so we can adjust size in ShaderDataTinyStorage. */
+  struct ShaderClosure closure[MAX_CLOSURE];
 } ShaderData;
 
 typedef ccl_addr_space struct ShaderDataTinyStorage {
-	char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
+  char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
 } ShaderDataTinyStorage;
-#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData*)shader_data_tiny_storage)
+#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData *)shader_data_tiny_storage)
 
 /* Path State */
 
 #ifdef __VOLUME__
 typedef struct VolumeStack {
-	int object;
-	int shader;
+  int object;
+  int shader;
 } VolumeStack;
 #endif
 
 typedef struct PathState {
-	/* see enum PathRayFlag */
-	int flag;
-
-	/* random number generator state */
-	uint rng_hash;          /* per pixel hash */
-	int rng_offset;         /* dimension offset */
-	int sample;             /* path sample number */
-	int num_samples;        /* total number of times this path will be sampled */
-	float branch_factor;    /* number of branches in indirect paths */
-
-	/* bounce counting */
-	int bounce;
-	int diffuse_bounce;
-	int glossy_bounce;
-	int transmission_bounce;
-	int transparent_bounce;
+  /* see enum PathRayFlag */
+  int flag;
+
+  /* random number generator state */
+  uint rng_hash;       /* per pixel hash */
+  int rng_offset;      /* dimension offset */
+  int sample;          /* path sample number */
+  int num_samples;     /* total number of times this path will be sampled */
+  float branch_factor; /* number of branches in indirect paths */
+
+  /* bounce counting */
+  int bounce;
+  int diffuse_bounce;
+  int glossy_bounce;
+  int transmission_bounce;
+  int transparent_bounce;
 
 #ifdef __DENOISING_FEATURES__
-	float denoising_feature_weight;
-#endif  /* __DENOISING_FEATURES__ */
+  float denoising_feature_weight;
+#endif /* __DENOISING_FEATURES__ */
 
-	/* multiple importance sampling */
-	float min_ray_pdf; /* smallest bounce pdf over entire path up to now */
-	float ray_pdf;     /* last bounce pdf */
+  /* multiple importance sampling */
+  float min_ray_pdf; /* smallest bounce pdf over entire path up to now */
+  float ray_pdf;     /* last bounce pdf */
 #ifdef __LAMP_MIS__
-	float ray_t;       /* accumulated distance through transparent surfaces */
+  float ray_t; /* accumulated distance through transparent surfaces */
 #endif
 
-	/* volume rendering */
+  /* volume rendering */
 #ifdef __VOLUME__
-	int volume_bounce;
-	int volume_bounds_bounce;
-	VolumeStack volume_stack[VOLUME_STACK_SIZE];
+  int volume_bounce;
+  int volume_bounds_bounce;
+  VolumeStack volume_stack[VOLUME_STACK_SIZE];
 #endif
 } PathState;
 
 /* Struct to gather multiple nearby intersections. */
 typedef struct LocalIntersection {
-	Ray ray;
-	float3 weight[LOCAL_MAX_HITS];
+  Ray ray;
+  float3 weight[LOCAL_MAX_HITS];
 
-	int num_hits;
-	struct Intersection hits[LOCAL_MAX_HITS];
-	float3 Ng[LOCAL_MAX_HITS];
+  int num_hits;
+  struct Intersection hits[LOCAL_MAX_HITS];
+  float3 Ng[LOCAL_MAX_HITS];
 } LocalIntersection;
 
 /* Subsurface */
 
 /* Struct to gather SSS indirect rays and delay tracing them. */
 typedef struct SubsurfaceIndirectRays {
-	PathState state[BSSRDF_MAX_HITS];
+  PathState state[BSSRDF_MAX_HITS];
 
-	int num_rays;
+  int num_rays;
 
-	struct Ray rays[BSSRDF_MAX_HITS];
-	float3 throughputs[BSSRDF_MAX_HITS];
-	struct PathRadianceState L_state[BSSRDF_MAX_HITS];
+  struct Ray rays[BSSRDF_MAX_HITS];
+  float3 throughputs[BSSRDF_MAX_HITS];
+  struct PathRadianceState L_state[BSSRDF_MAX_HITS];
 } SubsurfaceIndirectRays;
 static_assert(BSSRDF_MAX_HITS <= LOCAL_MAX_HITS, "BSSRDF hits too high.");
 
@@ -1109,424 +1085,424 @@ static_assert(BSSRDF_MAX_HITS <= LOCAL_MAX_HITS, "BSSRDF hits too high.");
  * do not use float3 because its size may not be the same on all devices. */
 
 typedef struct KernelCamera {
-	/* type */
-	int type;
-
-	/* panorama */
-	int panorama_type;
-	float fisheye_fov;
-	float fisheye_lens;
-	float4 equirectangular_range;
-
-	/* stereo */
-	float interocular_offset;
-	float convergence_distance;
-	float pole_merge_angle_from;
-	float pole_merge_angle_to;
-
-	/* matrices */
-	Transform cameratoworld;
-	ProjectionTransform rastertocamera;
-
-	/* differentials */
-	float4 dx;
-	float4 dy;
-
-	/* depth of field */
-	float aperturesize;
-	float blades;
-	float bladesrotation;
-	float focaldistance;
-
-	/* motion blur */
-	float shuttertime;
-	int num_motion_steps, have_perspective_motion;
-
-	/* clipping */
-	float nearclip;
-	float cliplength;
-
-	/* sensor size */
-	float sensorwidth;
-	float sensorheight;
-
-	/* render size */
-	float width, height;
-	int resolution;
-
-	/* anamorphic lens bokeh */
-	float inv_aperture_ratio;
-
-	int is_inside_volume;
-
-	/* more matrices */
-	ProjectionTransform screentoworld;
-	ProjectionTransform rastertoworld;
-	ProjectionTransform ndctoworld;
-	ProjectionTransform worldtoscreen;
-	ProjectionTransform worldtoraster;
-	ProjectionTransform worldtondc;
-	Transform worldtocamera;
-
-	/* Stores changes in the projeciton matrix. Use for camera zoom motion
-	 * blur and motion pass output for perspective camera. */
-	ProjectionTransform perspective_pre;
-	ProjectionTransform perspective_post;
-
-	/* Transforms for motion pass. */
-	Transform motion_pass_pre;
-	Transform motion_pass_post;
-
-	int shutter_table_offset;
-
-	/* Rolling shutter */
-	int rolling_shutter_type;
-	float rolling_shutter_duration;
-
-	int pad;
+  /* type */
+  int type;
+
+  /* panorama */
+  int panorama_type;
+  float fisheye_fov;
+  float fisheye_lens;
+  float4 equirectangular_range;
+
+  /* stereo */
+  float interocular_offset;
+  float convergence_distance;
+  float pole_merge_angle_from;
+  float pole_merge_angle_to;
+
+  /* matrices */
+  Transform cameratoworld;
+  ProjectionTransform rastertocamera;
+
+  /* differentials */
+  float4 dx;
+  float4 dy;
+
+  /* depth of field */
+  float aperturesize;
+  float blades;
+  float bladesrotation;
+  float focaldistance;
+
+  /* motion blur */
+  float shuttertime;
+  int num_motion_steps, have_perspective_motion;
+
+  /* clipping */
+  float nearclip;
+  float cliplength;
+
+  /* sensor size */
+  float sensorwidth;
+  float sensorheight;
+
+  /* render size */
+  float width, height;
+  int resolution;
+
+  /* anamorphic lens bokeh */
+  float inv_aperture_ratio;
+
+  int is_inside_volume;
+
+  /* more matrices */
+  ProjectionTransform screentoworld;
+  ProjectionTransform rastertoworld;
+  ProjectionTransform ndctoworld;
+  ProjectionTransform worldtoscreen;
+  ProjectionTransform worldtoraster;
+  ProjectionTransform worldtondc;
+  Transform worldtocamera;
+
+  /* Stores changes in the projeciton matrix. Use for camera zoom motion
+   * blur and motion pass output for perspective camera. */
+  ProjectionTransform perspective_pre;
+  ProjectionTransform perspective_post;
+
+  /* Transforms for motion pass. */
+  Transform motion_pass_pre;
+  Transform motion_pass_post;
+
+  int shutter_table_offset;
+
+  /* Rolling shutter */
+  int rolling_shutter_type;
+  float rolling_shutter_duration;
+
+  int pad;
 } KernelCamera;
 static_assert_align(KernelCamera, 16);
 
 typedef struct KernelFilm {
-	float exposure;
-	int pass_flag;
-	int light_pass_flag;
-	int pass_stride;
-	int use_light_pass;
-
-	int pass_combined;
-	int pass_depth;
-	int pass_normal;
-	int pass_motion;
-
-	int pass_motion_weight;
-	int pass_uv;
-	int pass_object_id;
-	int pass_material_id;
-
-	int pass_diffuse_color;
-	int pass_glossy_color;
-	int pass_transmission_color;
-	int pass_subsurface_color;
-
-	int pass_diffuse_indirect;
-	int pass_glossy_indirect;
-	int pass_transmission_indirect;
-	int pass_subsurface_indirect;
-	int pass_volume_indirect;
-
-	int pass_diffuse_direct;
-	int pass_glossy_direct;
-	int pass_transmission_direct;
-	int pass_subsurface_direct;
-	int pass_volume_direct;
-
-	int pass_emission;
-	int pass_background;
-	int pass_ao;
-	float pass_alpha_threshold;
-
-	int pass_shadow;
-	float pass_shadow_scale;
-	int filter_table_offset;
-	int cryptomatte_passes;
-	int cryptomatte_depth;
-	int pass_cryptomatte;
-
-	int pass_mist;
-	float mist_start;
-	float mist_inv_depth;
-	float mist_falloff;
-
-	int pass_denoising_data;
-	int pass_denoising_clean;
-	int denoising_flags;
-
-	/* XYZ to rendering color space transform. float4 instead of float3 to
-	 * ensure consistent padding/alignment across devices. */
-	float4 xyz_to_r;
-	float4 xyz_to_g;
-	float4 xyz_to_b;
-	float4 rgb_to_y;
+  float exposure;
+  int pass_flag;
+  int light_pass_flag;
+  int pass_stride;
+  int use_light_pass;
+
+  int pass_combined;
+  int pass_depth;
+  int pass_normal;
+  int pass_motion;
+
+  int pass_motion_weight;
+  int pass_uv;
+  int pass_object_id;
+  int pass_material_id;
+
+  int pass_diffuse_color;
+  int pass_glossy_color;
+  int pass_transmission_color;
+  int pass_subsurface_color;
+
+  int pass_diffuse_indirect;
+  int pass_glossy_indirect;
+  int pass_transmission_indirect;
+  int pass_subsurface_indirect;
+  int pass_volume_indirect;
+
+  int pass_diffuse_direct;
+  int pass_glossy_direct;
+  int pass_transmission_direct;
+  int pass_subsurface_direct;
+  int pass_volume_direct;
+
+  int pass_emission;
+  int pass_background;
+  int pass_ao;
+  float pass_alpha_threshold;
+
+  int pass_shadow;
+  float pass_shadow_scale;
+  int filter_table_offset;
+  int cryptomatte_passes;
+  int cryptomatte_depth;
+  int pass_cryptomatte;
+
+  int pass_mist;
+  float mist_start;
+  float mist_inv_depth;
+  float mist_falloff;
+
+  int pass_denoising_data;
+  int pass_denoising_clean;
+  int denoising_flags;
+
+  /* XYZ to rendering color space transform. float4 instead of float3 to
+   * ensure consistent padding/alignment across devices. */
+  float4 xyz_to_r;
+  float4 xyz_to_g;
+  float4 xyz_to_b;
+  float4 rgb_to_y;
 
 #ifdef __KERNEL_DEBUG__
-	int pass_bvh_traversed_nodes;
-	int pass_bvh_traversed_instances;
-	int pass_bvh_intersections;
-	int pass_ray_bounces;
+  int pass_bvh_traversed_nodes;
+  int pass_bvh_traversed_instances;
+  int pass_bvh_intersections;
+  int pass_ray_bounces;
 #endif
 } KernelFilm;
 static_assert_align(KernelFilm, 16);
 
 typedef struct KernelBackground {
-	/* only shader index */
-	int surface_shader;
-	int volume_shader;
-	int transparent;
-	float transparent_roughness_squared_threshold;
-
-	/* ambient occlusion */
-	float ao_factor;
-	float ao_distance;
-	float ao_bounces_factor;
-	float ao_pad;
+  /* only shader index */
+  int surface_shader;
+  int volume_shader;
+  int transparent;
+  float transparent_roughness_squared_threshold;
+
+  /* ambient occlusion */
+  float ao_factor;
+  float ao_distance;
+  float ao_bounces_factor;
+  float ao_pad;
 } KernelBackground;
 static_assert_align(KernelBackground, 16);
 
 typedef struct KernelIntegrator {
-	/* emission */
-	int use_direct_light;
-	int use_ambient_occlusion;
-	int num_distribution;
-	int num_all_lights;
-	float pdf_triangles;
-	float pdf_lights;
-	int pdf_background_res_x;
-	int pdf_background_res_y;
-	float light_inv_rr_threshold;
-
-	/* light portals */
-	float portal_pdf;
-	int num_portals;
-	int portal_offset;
-
-	/* bounces */
-	int max_bounce;
-
-	int max_diffuse_bounce;
-	int max_glossy_bounce;
-	int max_transmission_bounce;
-	int max_volume_bounce;
-
-	int ao_bounces;
-
-	/* transparent */
-	int transparent_max_bounce;
-	int transparent_shadows;
-
-	/* caustics */
-	int caustics_reflective;
-	int caustics_refractive;
-	float filter_glossy;
-
-	/* seed */
-	int seed;
-
-	/* clamp */
-	float sample_clamp_direct;
-	float sample_clamp_indirect;
-
-	/* branched path */
-	int branched;
-	int volume_decoupled;
-	int diffuse_samples;
-	int glossy_samples;
-	int transmission_samples;
-	int ao_samples;
-	int mesh_light_samples;
-	int subsurface_samples;
-	int sample_all_lights_direct;
-	int sample_all_lights_indirect;
-
-	/* mis */
-	int use_lamp_mis;
-
-	/* sampler */
-	int sampling_pattern;
-	int aa_samples;
-
-	/* volume render */
-	int use_volumes;
-	int volume_max_steps;
-	float volume_step_size;
-	int volume_samples;
-
-	int start_sample;
-
-	int max_closures;
-
-	int pad1, pad2, pad3;
+  /* emission */
+  int use_direct_light;
+  int use_ambient_occlusion;
+  int num_distribution;
+  int num_all_lights;
+  float pdf_triangles;
+  float pdf_lights;
+  int pdf_background_res_x;
+  int pdf_background_res_y;
+  float light_inv_rr_threshold;
+
+  /* light portals */
+  float portal_pdf;
+  int num_portals;
+  int portal_offset;
+
+  /* bounces */
+  int max_bounce;
+
+  int max_diffuse_bounce;
+  int max_glossy_bounce;
+  int max_transmission_bounce;
+  int max_volume_bounce;
+
+  int ao_bounces;
+
+  /* transparent */
+  int transparent_max_bounce;
+  int transparent_shadows;
+
+  /* caustics */
+  int caustics_reflective;
+  int caustics_refractive;
+  float filter_glossy;
+
+  /* seed */
+  int seed;
+
+  /* clamp */
+  float sample_clamp_direct;
+  float sample_clamp_indirect;
+
+  /* branched path */
+  int branched;
+  int volume_decoupled;
+  int diffuse_samples;
+  int glossy_samples;
+  int transmission_samples;
+  int ao_samples;
+  int mesh_light_samples;
+  int subsurface_samples;
+  int sample_all_lights_direct;
+  int sample_all_lights_indirect;
+
+  /* mis */
+  int use_lamp_mis;
+
+  /* sampler */
+  int sampling_pattern;
+  int aa_samples;
+
+  /* volume render */
+  int use_volumes;
+  int volume_max_steps;
+  float volume_step_size;
+  int volume_samples;
+
+  int start_sample;
+
+  int max_closures;
+
+  int pad1, pad2, pad3;
 } KernelIntegrator;
 static_assert_align(KernelIntegrator, 16);
 
 typedef enum KernelBVHLayout {
-	BVH_LAYOUT_NONE = 0,
-
-	BVH_LAYOUT_BVH2 = (1 << 0),
-	BVH_LAYOUT_BVH4 = (1 << 1),
-	BVH_LAYOUT_BVH8 = (1 << 2),
-	BVH_LAYOUT_EMBREE = (1 << 3),
-	BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8,
-	BVH_LAYOUT_ALL = (unsigned int)(-1),
+  BVH_LAYOUT_NONE = 0,
+
+  BVH_LAYOUT_BVH2 = (1 << 0),
+  BVH_LAYOUT_BVH4 = (1 << 1),
+  BVH_LAYOUT_BVH8 = (1 << 2),
+  BVH_LAYOUT_EMBREE = (1 << 3),
+  BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8,
+  BVH_LAYOUT_ALL = (unsigned int)(-1),
 } KernelBVHLayout;
 
 typedef struct KernelBVH {
-	/* Own BVH */
-	int root;
-	int have_motion;
-	int have_curves;
-	int have_instancing;
-	int bvh_layout;
-	int use_bvh_steps;
-
-	/* Embree */
+  /* Own BVH */
+  int root;
+  int have_motion;
+  int have_curves;
+  int have_instancing;
+  int bvh_layout;
+  int use_bvh_steps;
+
+  /* Embree */
 #ifdef __EMBREE__
-	RTCScene scene;
+  RTCScene scene;
 #  ifndef __KERNEL_64_BIT__
-	int pad1;
+  int pad1;
 #  endif
 #else
-	int pad1, pad2;
+  int pad1, pad2;
 #endif
 } KernelBVH;
 static_assert_align(KernelBVH, 16);
 
 typedef enum CurveFlag {
-	/* runtime flags */
-	CURVE_KN_BACKFACING = 1,				/* backside of cylinder? */
-	CURVE_KN_ENCLOSEFILTER = 2,				/* don't consider strands surrounding start point? */
-	CURVE_KN_INTERPOLATE = 4,				/* render as a curve? */
-	CURVE_KN_ACCURATE = 8,					/* use accurate intersections test? */
-	CURVE_KN_INTERSECTCORRECTION = 16,		/* correct for width after determing closest midpoint? */
-	CURVE_KN_TRUETANGENTGNORMAL = 32,		/* use tangent normal for geometry? */
-	CURVE_KN_RIBBONS = 64,					/* use flat curve ribbons */
+  /* runtime flags */
+  CURVE_KN_BACKFACING = 1,           /* backside of cylinder? */
+  CURVE_KN_ENCLOSEFILTER = 2,        /* don't consider strands surrounding start point? */
+  CURVE_KN_INTERPOLATE = 4,          /* render as a curve? */
+  CURVE_KN_ACCURATE = 8,             /* use accurate intersections test? */
+  CURVE_KN_INTERSECTCORRECTION = 16, /* correct for width after determing closest midpoint? */
+  CURVE_KN_TRUETANGENTGNORMAL = 32,  /* use tangent normal for geometry? */
+  CURVE_KN_RIBBONS = 64,             /* use flat curve ribbons */
 } CurveFlag;
 
 typedef struct KernelCurves {
-	int curveflags;
-	int subdivisions;
+  int curveflags;
+  int subdivisions;
 
-	float minimum_width;
-	float maximum_width;
+  float minimum_width;
+  float maximum_width;
 } KernelCurves;
 static_assert_align(KernelCurves, 16);
 
 typedef struct KernelTables {
-	int beckmann_offset;
-	int pad1, pad2, pad3;
+  int beckmann_offset;
+  int pad1, pad2, pad3;
 } KernelTables;
 static_assert_align(KernelTables, 16);
 
 typedef struct KernelData {
-	KernelCamera cam;
-	KernelFilm film;
-	KernelBackground background;
-	KernelIntegrator integrator;
-	KernelBVH bvh;
-	KernelCurves curve;
-	KernelTables tables;
+  KernelCamera cam;
+  KernelFilm film;
+  KernelBackground background;
+  KernelIntegrator integrator;
+  KernelBVH bvh;
+  KernelCurves curve;
+  KernelTables tables;
 } KernelData;
 static_assert_align(KernelData, 16);
 
 /* Kernel data structures. */
 
 typedef struct KernelObject {
-	Transform tfm;
-	Transform itfm;
+  Transform tfm;
+  Transform itfm;
 
-	float surface_area;
-	float pass_id;
-	float random_number;
-	int particle_index;
+  float surface_area;
+  float pass_id;
+  float random_number;
+  int particle_index;
 
-	float dupli_generated[3];
-	float dupli_uv[2];
+  float dupli_generated[3];
+  float dupli_uv[2];
 
-	int numkeys;
-	int numsteps;
-	int numverts;
+  int numkeys;
+  int numsteps;
+  int numverts;
 
-	uint patch_map_offset;
-	uint attribute_map_offset;
-	uint motion_offset;
-	uint pad1;
+  uint patch_map_offset;
+  uint attribute_map_offset;
+  uint motion_offset;
+  uint pad1;
 
-	float cryptomatte_object;
-	float cryptomatte_asset;
-	float pad2, pad3;
+  float cryptomatte_object;
+  float cryptomatte_asset;
+  float pad2, pad3;
 } KernelObject;
 static_assert_align(KernelObject, 16);
 
 typedef struct KernelSpotLight {
-	float radius;
-	float invarea;
-	float spot_angle;
-	float spot_smooth;
-	float dir[3];
-	float pad;
+  float radius;
+  float invarea;
+  float spot_angle;
+  float spot_smooth;
+  float dir[3];
+  float pad;
 } KernelSpotLight;
 
 /* PointLight is SpotLight with only radius and invarea being used. */
 
 typedef struct KernelAreaLight {
-	float axisu[3];
-	float invarea;
-	float axisv[3];
-	float pad1;
-	float dir[3];
-	float pad2;
+  float axisu[3];
+  float invarea;
+  float axisv[3];
+  float pad1;
+  float dir[3];
+  float pad2;
 } KernelAreaLight;
 
 typedef struct KernelDistantLight {
-	float radius;
-	float cosangle;
-	float invarea;
-	float pad;
+  float radius;
+  float cosangle;
+  float invarea;
+  float pad;
 } KernelDistantLight;
 
 typedef struct KernelLight {
-	int type;
-	float co[3];
-	int shader_id;
-	int samples;
-	float max_bounces;
-	float random;
-	Transform tfm;
-	Transform itfm;
-	union {
-		KernelSpotLight spot;
-		KernelAreaLight area;
-		KernelDistantLight distant;
-	};
+  int type;
+  float co[3];
+  int shader_id;
+  int samples;
+  float max_bounces;
+  float random;
+  Transform tfm;
+  Transform itfm;
+  union {
+    KernelSpotLight spot;
+    KernelAreaLight area;
+    KernelDistantLight distant;
+  };
 } KernelLight;
 static_assert_align(KernelLight, 16);
 
 typedef struct KernelLightDistribution {
-	float totarea;
-	int prim;
-	union {
-		struct {
-			int shader_flag;
-			int object_id;
-		} mesh_light;
-		struct {
-			float pad;
-			float size;
-		} lamp;
-	};
+  float totarea;
+  int prim;
+  union {
+    struct {
+      int shader_flag;
+      int object_id;
+    } mesh_light;
+    struct {
+      float pad;
+      float size;
+    } lamp;
+  };
 } KernelLightDistribution;
 static_assert_align(KernelLightDistribution, 16);
 
 typedef struct KernelParticle {
-	int index;
-	float age;
-	float lifetime;
-	float size;
-	float4 rotation;
-	/* Only xyz are used of the following. float4 instead of float3 are used
-	 * to ensure consistent padding/alignment across devices. */
-	float4 location;
-	float4 velocity;
-	float4 angular_velocity;
+  int index;
+  float age;
+  float lifetime;
+  float size;
+  float4 rotation;
+  /* Only xyz are used of the following. float4 instead of float3 are used
+   * to ensure consistent padding/alignment across devices. */
+  float4 location;
+  float4 velocity;
+  float4 angular_velocity;
 } KernelParticle;
 static_assert_align(KernelParticle, 16);
 
 typedef struct KernelShader {
-	float constant_emission[3];
-	float cryptomatte_id;
-	int flags;
-	int pass_id;
-	int pad2, pad3;
+  float constant_emission[3];
+  float cryptomatte_id;
+  int flags;
+  int pass_id;
+  int pad2, pad3;
 } KernelShader;
 static_assert_align(KernelShader, 16);
 
@@ -1545,88 +1521,93 @@ static_assert_align(KernelShader, 16);
 
 /* Queue names */
 enum QueueNumber {
-	/* All active rays and regenerated rays are enqueued here. */
-	QUEUE_ACTIVE_AND_REGENERATED_RAYS = 0,
-
-	/* All
-	 * 1. Background-hit rays,
-	 * 2. Rays that has exited path-iteration but needs to update output buffer
-	 * 3. Rays to be regenerated
-	 * are enqueued here.
-	 */
-	QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-
-	/* All rays for which a shadow ray should be cast to determine radiance
-	 * contribution for AO are enqueued here.
-	 */
-	QUEUE_SHADOW_RAY_CAST_AO_RAYS,
-
-	/* All rays for which a shadow ray should be cast to determine radiance
-	 * contributing for direct lighting are enqueued here.
-	 */
-	QUEUE_SHADOW_RAY_CAST_DL_RAYS,
-
-	/* Rays sorted according to shader->id */
-	QUEUE_SHADER_SORTED_RAYS,
+  /* All active rays and regenerated rays are enqueued here. */
+  QUEUE_ACTIVE_AND_REGENERATED_RAYS = 0,
+
+  /* All
+   * 1. Background-hit rays,
+   * 2. Rays that has exited path-iteration but needs to update output buffer
+   * 3. Rays to be regenerated
+   * are enqueued here.
+   */
+  QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+
+  /* All rays for which a shadow ray should be cast to determine radiance
+   * contribution for AO are enqueued here.
+   */
+  QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+
+  /* All rays for which a shadow ray should be cast to determine radiance
+   * contributing for direct lighting are enqueued here.
+   */
+  QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+
+  /* Rays sorted according to shader->id */
+  QUEUE_SHADER_SORTED_RAYS,
 
 #ifdef __BRANCHED_PATH__
-	/* All rays moving to next iteration of the indirect loop for light */
-	QUEUE_LIGHT_INDIRECT_ITER,
-	/* Queue of all inactive rays. These are candidates for sharing work of indirect loops */
-	QUEUE_INACTIVE_RAYS,
+  /* All rays moving to next iteration of the indirect loop for light */
+  QUEUE_LIGHT_INDIRECT_ITER,
+  /* Queue of all inactive rays. These are candidates for sharing work of indirect loops */
+  QUEUE_INACTIVE_RAYS,
 #  ifdef __VOLUME__
-	/* All rays moving to next iteration of the indirect loop for volumes */
-	QUEUE_VOLUME_INDIRECT_ITER,
+  /* All rays moving to next iteration of the indirect loop for volumes */
+  QUEUE_VOLUME_INDIRECT_ITER,
 #  endif
 #  ifdef __SUBSURFACE__
-	/* All rays moving to next iteration of the indirect loop for subsurface */
-	QUEUE_SUBSURFACE_INDIRECT_ITER,
+  /* All rays moving to next iteration of the indirect loop for subsurface */
+  QUEUE_SUBSURFACE_INDIRECT_ITER,
 #  endif
-#endif  /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
 
-	NUM_QUEUES
+  NUM_QUEUES
 };
 
 /* We use RAY_STATE_MASK to get ray_state */
 #define RAY_STATE_MASK 0x0F
 #define RAY_FLAG_MASK 0xF0
 enum RayState {
-	RAY_INVALID = 0,
-	/* Denotes ray is actively involved in path-iteration. */
-	RAY_ACTIVE,
-	/* Denotes ray has completed processing all samples and is inactive. */
-	RAY_INACTIVE,
-	/* Denotes ray has exited path-iteration and needs to update output buffer. */
-	RAY_UPDATE_BUFFER,
-	/* Denotes ray needs to skip most surface shader work. */
-	RAY_HAS_ONLY_VOLUME,
-	/* Donotes ray has hit background */
-	RAY_HIT_BACKGROUND,
-	/* Denotes ray has to be regenerated */
-	RAY_TO_REGENERATE,
-	/* Denotes ray has been regenerated */
-	RAY_REGENERATED,
-	/* Denotes ray is moving to next iteration of the branched indirect loop */
-	RAY_LIGHT_INDIRECT_NEXT_ITER,
-	RAY_VOLUME_INDIRECT_NEXT_ITER,
-	RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
-
-	/* Ray flags */
-
-	/* Flags to denote that the ray is currently evaluating the branched indirect loop */
-	RAY_BRANCHED_LIGHT_INDIRECT = (1 << 4),
-	RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
-	RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
-	RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT),
-
-	/* Ray is evaluating an iteration of an indirect loop for another thread */
-	RAY_BRANCHED_INDIRECT_SHARED = (1 << 7),
+  RAY_INVALID = 0,
+  /* Denotes ray is actively involved in path-iteration. */
+  RAY_ACTIVE,
+  /* Denotes ray has completed processing all samples and is inactive. */
+  RAY_INACTIVE,
+  /* Denotes ray has exited path-iteration and needs to update output buffer. */
+  RAY_UPDATE_BUFFER,
+  /* Denotes ray needs to skip most surface shader work. */
+  RAY_HAS_ONLY_VOLUME,
+  /* Donotes ray has hit background */
+  RAY_HIT_BACKGROUND,
+  /* Denotes ray has to be regenerated */
+  RAY_TO_REGENERATE,
+  /* Denotes ray has been regenerated */
+  RAY_REGENERATED,
+  /* Denotes ray is moving to next iteration of the branched indirect loop */
+  RAY_LIGHT_INDIRECT_NEXT_ITER,
+  RAY_VOLUME_INDIRECT_NEXT_ITER,
+  RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
+
+  /* Ray flags */
+
+  /* Flags to denote that the ray is currently evaluating the branched indirect loop */
+  RAY_BRANCHED_LIGHT_INDIRECT = (1 << 4),
+  RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
+  RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
+  RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT |
+                           RAY_BRANCHED_SUBSURFACE_INDIRECT),
+
+  /* Ray is evaluating an iteration of an indirect loop for another thread */
+  RAY_BRANCHED_INDIRECT_SHARED = (1 << 7),
 };
 
-#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
-#define IS_STATE(ray_state, ray_index, state) ((ray_index) != QUEUE_EMPTY_SLOT && ((ray_state)[(ray_index)] & RAY_STATE_MASK) == (state))
-#define ADD_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] | flag))
-#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
+#define ASSIGN_RAY_STATE(ray_state, ray_index, state) \
+  (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
+#define IS_STATE(ray_state, ray_index, state) \
+  ((ray_index) != QUEUE_EMPTY_SLOT && ((ray_state)[(ray_index)] & RAY_STATE_MASK) == (state))
+#define ADD_RAY_FLAG(ray_state, ray_index, flag) \
+  (ray_state[ray_index] = (ray_state[ray_index] | flag))
+#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) \
+  (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
 #define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag)
 
 /* Patches */
@@ -1642,17 +1623,17 @@ enum RayState {
 /* Work Tiles */
 
 typedef struct WorkTile {
-	uint x, y, w, h;
+  uint x, y, w, h;
 
-	uint start_sample;
-	uint num_samples;
+  uint start_sample;
+  uint num_samples;
 
-	uint offset;
-	uint stride;
+  uint offset;
+  uint stride;
 
-	ccl_global float *buffer;
+  ccl_global float *buffer;
 } WorkTile;
 
 CCL_NAMESPACE_END
 
-#endif  /*  __KERNEL_TYPES_H__ */
+#endif /*  __KERNEL_TYPES_H__ */
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 44c8f795d2c..e024003252f 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -19,9 +19,9 @@ CCL_NAMESPACE_BEGIN
 /* Events for probalistic scattering */
 
 typedef enum VolumeIntegrateResult {
-	VOLUME_PATH_SCATTERED = 0,
-	VOLUME_PATH_ATTENUATED = 1,
-	VOLUME_PATH_MISSED = 2
+  VOLUME_PATH_SCATTERED = 0,
+  VOLUME_PATH_ATTENUATED = 1,
+  VOLUME_PATH_MISSED = 2
 } VolumeIntegrateResult;
 
 /* Volume shader properties
@@ -30,9 +30,9 @@ typedef enum VolumeIntegrateResult {
  * sigma_t = sigma_a + sigma_s */
 
 typedef struct VolumeShaderCoefficients {
-	float3 sigma_t;
-	float3 sigma_s;
-	float3 emission;
+  float3 sigma_t;
+  float3 sigma_s;
+  float3 emission;
 } VolumeShaderCoefficients;
 
 #ifdef __VOLUME__
@@ -44,16 +44,16 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
                                                        float3 P,
                                                        float3 *extinction)
 {
-	sd->P = P;
-	shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
-
-	if(sd->flag & SD_EXTINCTION) {
-		*extinction = sd->closure_transparent_extinction;
-		return true;
-	}
-	else {
-		return false;
-	}
+  sd->P = P;
+  shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
+
+  if (sd->flag & SD_EXTINCTION) {
+    *extinction = sd->closure_transparent_extinction;
+    return true;
+  }
+  else {
+    return false;
+  }
 }
 
 /* evaluate shader to get absorption, scattering and emission at P */
@@ -63,97 +63,97 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
                                             float3 P,
                                             VolumeShaderCoefficients *coeff)
 {
-	sd->P = P;
-	shader_eval_volume(kg, sd, state, state->volume_stack, state->flag);
+  sd->P = P;
+  shader_eval_volume(kg, sd, state, state->volume_stack, state->flag);
 
-	if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION)))
-		return false;
+  if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION)))
+    return false;
 
-	coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
-	coeff->sigma_t = (sd->flag & SD_EXTINCTION)? sd->closure_transparent_extinction:
-	                                             make_float3(0.0f, 0.0f, 0.0f);
-	coeff->emission = (sd->flag & SD_EMISSION)? sd->closure_emission_background:
-	                                            make_float3(0.0f, 0.0f, 0.0f);
+  coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
+  coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction :
+                                                make_float3(0.0f, 0.0f, 0.0f);
+  coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background :
+                                               make_float3(0.0f, 0.0f, 0.0f);
 
-	if(sd->flag & SD_SCATTER) {
-		for(int i = 0; i < sd->num_closure; i++) {
-			const ShaderClosure *sc = &sd->closure[i];
+  if (sd->flag & SD_SCATTER) {
+    for (int i = 0; i < sd->num_closure; i++) {
+      const ShaderClosure *sc = &sd->closure[i];
 
-			if(CLOSURE_IS_VOLUME(sc->type))
-				coeff->sigma_s += sc->weight;
-		}
-	}
+      if (CLOSURE_IS_VOLUME(sc->type))
+        coeff->sigma_s += sc->weight;
+    }
+  }
 
-	return true;
+  return true;
 }
 
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
 {
-	return exp3(-sigma * t);
+  return exp3(-sigma * t);
 }
 
 ccl_device float kernel_volume_channel_get(float3 value, int channel)
 {
-	return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
+  return (channel == 0) ? value.x : ((channel == 1) ? value.y : value.z);
 }
 
 #ifdef __VOLUME__
 
 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
 {
-	for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
-		int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
-
-		if(shader_flag & SD_HETEROGENEOUS_VOLUME) {
-			return true;
-		}
-		else if(shader_flag & SD_NEED_ATTRIBUTES) {
-			/* We want to render world or objects without any volume grids
-			 * as homogenous, but can only verify this at runtime since other
-			 * heterogenous volume objects may be using the same shader. */
-			int object = stack[i].object;
-			if(object != OBJECT_NONE) {
-				int object_flag = kernel_tex_fetch(__object_flag, object);
-				if(object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
-					return true;
-				}
-			}
-		}
-	}
-
-	return false;
+  for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
+    int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
+
+    if (shader_flag & SD_HETEROGENEOUS_VOLUME) {
+      return true;
+    }
+    else if (shader_flag & SD_NEED_ATTRIBUTES) {
+      /* We want to render world or objects without any volume grids
+       * as homogenous, but can only verify this at runtime since other
+       * heterogenous volume objects may be using the same shader. */
+      int object = stack[i].object;
+      if (object != OBJECT_NONE) {
+        int object_flag = kernel_tex_fetch(__object_flag, object);
+        if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
+          return true;
+        }
+      }
+    }
+  }
+
+  return false;
 }
 
 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
 {
-	if(kernel_data.integrator.num_all_lights == 0)
-		return 0;
+  if (kernel_data.integrator.num_all_lights == 0)
+    return 0;
 
-	int method = -1;
+  int method = -1;
 
-	for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
-		int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
+  for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
+    int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
 
-		if(shader_flag & SD_VOLUME_MIS) {
-			return SD_VOLUME_MIS;
-		}
-		else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
-			if(method == 0)
-				return SD_VOLUME_MIS;
+    if (shader_flag & SD_VOLUME_MIS) {
+      return SD_VOLUME_MIS;
+    }
+    else if (shader_flag & SD_VOLUME_EQUIANGULAR) {
+      if (method == 0)
+        return SD_VOLUME_MIS;
 
-			method = SD_VOLUME_EQUIANGULAR;
-		}
-		else {
-			if(method == SD_VOLUME_EQUIANGULAR)
-				return SD_VOLUME_MIS;
+      method = SD_VOLUME_EQUIANGULAR;
+    }
+    else {
+      if (method == SD_VOLUME_EQUIANGULAR)
+        return SD_VOLUME_MIS;
 
-			method = 0;
-		}
-	}
+      method = 0;
+    }
+  }
 
-	return method;
+  return method;
 }
 
 ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg,
@@ -162,16 +162,16 @@ ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg,
                                                float *step_size,
                                                float *step_offset)
 {
-	const int max_steps = kernel_data.integrator.volume_max_steps;
-	float step = min(kernel_data.integrator.volume_step_size, t);
+  const int max_steps = kernel_data.integrator.volume_max_steps;
+  float step = min(kernel_data.integrator.volume_step_size, t);
 
-	/* compute exact steps in advance for malloc */
-	if(t > max_steps * step) {
-		step = t / (float)max_steps;
-	}
+  /* compute exact steps in advance for malloc */
+  if (t > max_steps * step) {
+    step = t / (float)max_steps;
+  }
 
-	*step_size = step;
-	*step_offset = path_state_rng_1D_hash(kg, state, 0x1e31d8a4) * step;
+  *step_size = step;
+  *step_offset = path_state_rng_1D_hash(kg, state, 0x1e31d8a4) * step;
 }
 
 /* Volume Shadows
@@ -187,10 +187,10 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
                                                  ShaderData *sd,
                                                  float3 *throughput)
 {
-	float3 sigma_t;
+  float3 sigma_t;
 
-	if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
-		*throughput *= volume_color_transmittance(sigma_t, ray->t);
+  if (volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
+    *throughput *= volume_color_transmittance(sigma_t, ray->t);
 }
 
 /* heterogeneous volume: integrate stepping through the volume until we
@@ -201,57 +201,57 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
                                                    ShaderData *sd,
                                                    float3 *throughput)
 {
-	float3 tp = *throughput;
-	const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
-
-	/* prepare for stepping */
-	int max_steps = kernel_data.integrator.volume_max_steps;
-	float step_offset, step_size;
-	kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
-
-	/* compute extinction at the start */
-	float t = 0.0f;
-
-	float3 sum = make_float3(0.0f, 0.0f, 0.0f);
-
-	for(int i = 0; i < max_steps; i++) {
-		/* advance to new position */
-		float new_t = min(ray->t, (i+1) * step_size);
-
-		/* use random position inside this segment to sample shader, adjust
-		 * for last step that is shorter than other steps. */
-		if(new_t == ray->t) {
-			step_offset *= (new_t - t) / step_size;
-		}
-
-		float3 new_P = ray->P + ray->D * (t + step_offset);
-		float3 sigma_t;
-
-		/* compute attenuation over segment */
-		if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
-			/* Compute expf() only for every Nth step, to save some calculations
-			 * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
-
-			sum += (-sigma_t * (new_t - t));
-			if((i & 0x07) == 0) { /* ToDo: Other interval? */
-				tp = *throughput * exp3(sum);
-
-				/* stop if nearly all light is blocked */
-				if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
-					break;
-			}
-		}
-
-		/* stop if at the end of the volume */
-		t = new_t;
-		if(t == ray->t) {
-			/* Update throughput in case we haven't done it above */
-			tp = *throughput * exp3(sum);
-			break;
-		}
-	}
-
-	*throughput = tp;
+  float3 tp = *throughput;
+  const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
+
+  /* prepare for stepping */
+  int max_steps = kernel_data.integrator.volume_max_steps;
+  float step_offset, step_size;
+  kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+
+  /* compute extinction at the start */
+  float t = 0.0f;
+
+  float3 sum = make_float3(0.0f, 0.0f, 0.0f);
+
+  for (int i = 0; i < max_steps; i++) {
+    /* advance to new position */
+    float new_t = min(ray->t, (i + 1) * step_size);
+
+    /* use random position inside this segment to sample shader, adjust
+     * for last step that is shorter than other steps. */
+    if (new_t == ray->t) {
+      step_offset *= (new_t - t) / step_size;
+    }
+
+    float3 new_P = ray->P + ray->D * (t + step_offset);
+    float3 sigma_t;
+
+    /* compute attenuation over segment */
+    if (volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
+      /* Compute expf() only for every Nth step, to save some calculations
+       * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
+
+      sum += (-sigma_t * (new_t - t));
+      if ((i & 0x07) == 0) { /* ToDo: Other interval? */
+        tp = *throughput * exp3(sum);
+
+        /* stop if nearly all light is blocked */
+        if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
+          break;
+      }
+    }
+
+    /* stop if at the end of the volume */
+    t = new_t;
+    if (t == ray->t) {
+      /* Update throughput in case we haven't done it above */
+      tp = *throughput * exp3(sum);
+      break;
+    }
+  }
+
+  *throughput = tp;
 }
 
 /* get the volume attenuation over line segment defined by ray, with the
@@ -262,422 +262,433 @@ ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
                                               Ray *ray,
                                               float3 *throughput)
 {
-	shader_setup_from_volume(kg, shadow_sd, ray);
+  shader_setup_from_volume(kg, shadow_sd, ray);
 
-	if(volume_stack_is_heterogeneous(kg, state->volume_stack))
-		kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
-	else
-		kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
+  if (volume_stack_is_heterogeneous(kg, state->volume_stack))
+    kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
+  else
+    kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
 }
 
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 /* Equi-angular sampling as in:
  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
 
 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
 {
-	float t = ray->t;
-
-	float delta = dot((light_P - ray->P) , ray->D);
-	float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
-	if(UNLIKELY(D == 0.0f)) {
-		*pdf = 0.0f;
-		return 0.0f;
-	}
-	float theta_a = -atan2f(delta, D);
-	float theta_b = atan2f(t - delta, D);
-	float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
-	if(UNLIKELY(theta_b == theta_a)) {
-		*pdf = 0.0f;
-		return 0.0f;
-	}
-	*pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
-
-	return min(t, delta + t_); /* min is only for float precision errors */
+  float t = ray->t;
+
+  float delta = dot((light_P - ray->P), ray->D);
+  float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+  if (UNLIKELY(D == 0.0f)) {
+    *pdf = 0.0f;
+    return 0.0f;
+  }
+  float theta_a = -atan2f(delta, D);
+  float theta_b = atan2f(t - delta, D);
+  float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
+  if (UNLIKELY(theta_b == theta_a)) {
+    *pdf = 0.0f;
+    return 0.0f;
+  }
+  *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+
+  return min(t, delta + t_); /* min is only for float precision errors */
 }
 
 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
 {
-	float delta = dot((light_P - ray->P) , ray->D);
-	float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
-	if(UNLIKELY(D == 0.0f)) {
-		return 0.0f;
-	}
+  float delta = dot((light_P - ray->P), ray->D);
+  float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+  if (UNLIKELY(D == 0.0f)) {
+    return 0.0f;
+  }
 
-	float t = ray->t;
-	float t_ = sample_t - delta;
+  float t = ray->t;
+  float t_ = sample_t - delta;
 
-	float theta_a = -atan2f(delta, D);
-	float theta_b = atan2f(t - delta, D);
-	if(UNLIKELY(theta_b == theta_a)) {
-		return 0.0f;
-	}
+  float theta_a = -atan2f(delta, D);
+  float theta_b = atan2f(t - delta, D);
+  if (UNLIKELY(theta_b == theta_a)) {
+    return 0.0f;
+  }
 
-	float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+  float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
 
-	return pdf;
+  return pdf;
 }
 
 /* Distance sampling */
 
-ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
+ccl_device float kernel_volume_distance_sample(
+    float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
 {
-	/* xi is [0, 1[ so log(0) should never happen, division by zero is
-	 * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
-	float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
-	float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
-	float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
+  /* xi is [0, 1[ so log(0) should never happen, division by zero is
+   * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
+  float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
+  float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+  float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
 
-	float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
+  float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t);
 
-	*transmittance = volume_color_transmittance(sigma_t, sample_t);
-	*pdf = safe_divide_color(sigma_t * *transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
+  *transmittance = volume_color_transmittance(sigma_t, sample_t);
+  *pdf = safe_divide_color(sigma_t * *transmittance,
+                           make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
 
-	/* todo: optimization: when taken together with hit/miss decision,
-	 * the full_transmittance cancels out drops out and xi does not
-	 * need to be remapped */
+  /* todo: optimization: when taken together with hit/miss decision,
+   * the full_transmittance cancels out drops out and xi does not
+   * need to be remapped */
 
-	return sample_t;
+  return sample_t;
 }
 
 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
 {
-	float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
-	float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
+  float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+  float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
 
-	return safe_divide_color(sigma_t * transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
+  return safe_divide_color(sigma_t * transmittance,
+                           make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
 }
 
 /* Emission */
 
-ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
+ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff,
+                                                   int closure_flag,
+                                                   float3 transmittance,
+                                                   float t)
 {
-	/* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
-	 * this goes to E * t as sigma_t goes to zero
-	 *
-	 * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
-	float3 emission = coeff->emission;
-
-	if(closure_flag & SD_EXTINCTION) {
-		float3 sigma_t = coeff->sigma_t;
-
-		emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
-		emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
-		emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
-	}
-	else
-		emission *= t;
-
-	return emission;
+  /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
+   * this goes to E * t as sigma_t goes to zero
+   *
+   * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
+  float3 emission = coeff->emission;
+
+  if (closure_flag & SD_EXTINCTION) {
+    float3 sigma_t = coeff->sigma_t;
+
+    emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t;
+    emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t;
+    emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t;
+  }
+  else
+    emission *= t;
+
+  return emission;
 }
 
 /* Volume Path */
 
-ccl_device int kernel_volume_sample_channel(float3 albedo, float3 throughput, float rand, float3 *pdf)
+ccl_device int kernel_volume_sample_channel(float3 albedo,
+                                            float3 throughput,
+                                            float rand,
+                                            float3 *pdf)
 {
-	/* Sample color channel proportional to throughput and single scattering
-	 * albedo, to significantly reduce noise with many bounce, following:
-	 *
-	 * "Practical and Controllable Subsurface Scattering for Production Path
-	 *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
-	float3 weights = fabs(throughput * albedo);
-	float sum_weights = weights.x + weights.y + weights.z;
-	float3 weights_pdf;
-
-	if(sum_weights > 0.0f) {
-		weights_pdf = weights/sum_weights;
-	}
-	else {
-		weights_pdf = make_float3(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f);
-	}
-
-	*pdf = weights_pdf;
-
-	/* OpenCL does not support -> on float3, so don't use pdf->x. */
-	if(rand < weights_pdf.x) {
-		return 0;
-	}
-	else if(rand < weights_pdf.x + weights_pdf.y) {
-		return 1;
-	}
-	else {
-		return 2;
-	}
+  /* Sample color channel proportional to throughput and single scattering
+   * albedo, to significantly reduce noise with many bounce, following:
+   *
+   * "Practical and Controllable Subsurface Scattering for Production Path
+   *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
+  float3 weights = fabs(throughput * albedo);
+  float sum_weights = weights.x + weights.y + weights.z;
+  float3 weights_pdf;
+
+  if (sum_weights > 0.0f) {
+    weights_pdf = weights / sum_weights;
+  }
+  else {
+    weights_pdf = make_float3(1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f);
+  }
+
+  *pdf = weights_pdf;
+
+  /* OpenCL does not support -> on float3, so don't use pdf->x. */
+  if (rand < weights_pdf.x) {
+    return 0;
+  }
+  else if (rand < weights_pdf.x + weights_pdf.y) {
+    return 1;
+  }
+  else {
+    return 2;
+  }
 }
 
 #ifdef __VOLUME__
 
 /* homogeneous volume: assume shader evaluation at the start gives
  * the volume shading coefficient for the entire line segment */
-ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
-    KernelGlobals *kg,
-    ccl_addr_space PathState *state,
-    Ray *ray,
-    ShaderData *sd,
-    PathRadiance *L,
-    ccl_addr_space float3 *throughput,
-    bool probalistic_scatter)
+ccl_device VolumeIntegrateResult
+kernel_volume_integrate_homogeneous(KernelGlobals *kg,
+                                    ccl_addr_space PathState *state,
+                                    Ray *ray,
+                                    ShaderData *sd,
+                                    PathRadiance *L,
+                                    ccl_addr_space float3 *throughput,
+                                    bool probalistic_scatter)
 {
-	VolumeShaderCoefficients coeff;
-
-	if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
-		return VOLUME_PATH_MISSED;
-
-	int closure_flag = sd->flag;
-	float t = ray->t;
-	float3 new_tp;
-
-#ifdef __VOLUME_SCATTER__
-	/* randomly scatter, and if we do t is shortened */
-	if(closure_flag & SD_SCATTER) {
-		/* Sample channel, use MIS with balance heuristic. */
-		float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
-		float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
-		float3 channel_pdf;
-		int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
-
-		/* decide if we will hit or miss */
-		bool scatter = true;
-		float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-
-		if(probalistic_scatter) {
-			float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
-			float sample_transmittance = expf(-sample_sigma_t * t);
-
-			if(1.0f - xi >= sample_transmittance) {
-				scatter = true;
-
-				/* rescale random number so we can reuse it */
-				xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
-
-			}
-			else
-				scatter = false;
-		}
-
-		if(scatter) {
-			/* scattering */
-			float3 pdf;
-			float3 transmittance;
-			float sample_t;
-
-			/* distance sampling */
-			sample_t = kernel_volume_distance_sample(ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
-
-			/* modify pdf for hit/miss decision */
-			if(probalistic_scatter)
-				pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
-
-			new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
-			t = sample_t;
-		}
-		else {
-			/* no scattering */
-			float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
-			float pdf = dot(channel_pdf, transmittance);
-			new_tp = *throughput * transmittance / pdf;
-		}
-	}
-	else
-#endif
-	if(closure_flag & SD_EXTINCTION) {
-		/* absorption only, no sampling needed */
-		float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
-		new_tp = *throughput * transmittance;
-	}
-	else {
-		new_tp = *throughput;
-	}
-
-	/* integrate emission attenuated by extinction */
-	if(L && (closure_flag & SD_EMISSION)) {
-		float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
-		float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
-		path_radiance_accum_emission(L, state, *throughput, emission);
-	}
-
-	/* modify throughput */
-	if(closure_flag & SD_EXTINCTION) {
-		*throughput = new_tp;
-
-		/* prepare to scatter to new direction */
-		if(t < ray->t) {
-			/* adjust throughput and move to new location */
-			sd->P = ray->P + t*ray->D;
-
-			return VOLUME_PATH_SCATTERED;
-		}
-	}
-
-	return VOLUME_PATH_ATTENUATED;
+  VolumeShaderCoefficients coeff;
+
+  if (!volume_shader_sample(kg, sd, state, ray->P, &coeff))
+    return VOLUME_PATH_MISSED;
+
+  int closure_flag = sd->flag;
+  float t = ray->t;
+  float3 new_tp;
+
+#  ifdef __VOLUME_SCATTER__
+  /* randomly scatter, and if we do t is shortened */
+  if (closure_flag & SD_SCATTER) {
+    /* Sample channel, use MIS with balance heuristic. */
+    float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+    float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+    float3 channel_pdf;
+    int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
+
+    /* decide if we will hit or miss */
+    bool scatter = true;
+    float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+
+    if (probalistic_scatter) {
+      float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
+      float sample_transmittance = expf(-sample_sigma_t * t);
+
+      if (1.0f - xi >= sample_transmittance) {
+        scatter = true;
+
+        /* rescale random number so we can reuse it */
+        xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance);
+      }
+      else
+        scatter = false;
+    }
+
+    if (scatter) {
+      /* scattering */
+      float3 pdf;
+      float3 transmittance;
+      float sample_t;
+
+      /* distance sampling */
+      sample_t = kernel_volume_distance_sample(
+          ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
+
+      /* modify pdf for hit/miss decision */
+      if (probalistic_scatter)
+        pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
+
+      new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
+      t = sample_t;
+    }
+    else {
+      /* no scattering */
+      float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
+      float pdf = dot(channel_pdf, transmittance);
+      new_tp = *throughput * transmittance / pdf;
+    }
+  }
+  else
+#  endif
+      if (closure_flag & SD_EXTINCTION) {
+    /* absorption only, no sampling needed */
+    float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
+    new_tp = *throughput * transmittance;
+  }
+  else {
+    new_tp = *throughput;
+  }
+
+  /* integrate emission attenuated by extinction */
+  if (L && (closure_flag & SD_EMISSION)) {
+    float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
+    float3 emission = kernel_volume_emission_integrate(
+        &coeff, closure_flag, transmittance, ray->t);
+    path_radiance_accum_emission(L, state, *throughput, emission);
+  }
+
+  /* modify throughput */
+  if (closure_flag & SD_EXTINCTION) {
+    *throughput = new_tp;
+
+    /* prepare to scatter to new direction */
+    if (t < ray->t) {
+      /* adjust throughput and move to new location */
+      sd->P = ray->P + t * ray->D;
+
+      return VOLUME_PATH_SCATTERED;
+    }
+  }
+
+  return VOLUME_PATH_ATTENUATED;
 }
 
 /* heterogeneous volume distance sampling: integrate stepping through the
  * volume until we reach the end, get absorbed entirely, or run out of
  * iterations. this does probabilistically scatter or get transmitted through
  * for path tracing where we don't want to branch. */
-ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
-    KernelGlobals *kg,
-    ccl_addr_space PathState *state,
-    Ray *ray,
-    ShaderData *sd,
-    PathRadiance *L,
-    ccl_addr_space float3 *throughput)
+ccl_device VolumeIntegrateResult
+kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
+                                               ccl_addr_space PathState *state,
+                                               Ray *ray,
+                                               ShaderData *sd,
+                                               PathRadiance *L,
+                                               ccl_addr_space float3 *throughput)
 {
-	float3 tp = *throughput;
-	const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
-
-	/* prepare for stepping */
-	int max_steps = kernel_data.integrator.volume_max_steps;
-	float step_offset, step_size;
-	kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
-
-	/* compute coefficients at the start */
-	float t = 0.0f;
-	float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
-
-	/* pick random color channel, we use the Veach one-sample
-	 * model with balance heuristic for the channels */
-	float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-	float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
-	bool has_scatter = false;
-
-	for(int i = 0; i < max_steps; i++) {
-		/* advance to new position */
-		float new_t = min(ray->t, (i+1) * step_size);
-		float dt = new_t - t;
-
-		/* use random position inside this segment to sample shader,
-		* for last shorter step we remap it to fit within the segment. */
-		if(new_t == ray->t) {
-			step_offset *= (new_t - t) / step_size;
-		}
-
-		float3 new_P = ray->P + ray->D * (t + step_offset);
-		VolumeShaderCoefficients coeff;
-
-		/* compute segment */
-		if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
-			int closure_flag = sd->flag;
-			float3 new_tp;
-			float3 transmittance;
-			bool scatter = false;
-
-			/* distance sampling */
-#ifdef __VOLUME_SCATTER__
-			if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
-				has_scatter = true;
-
-				/* Sample channel, use MIS with balance heuristic. */
-				float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
-				float3 channel_pdf;
-				int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
-
-				/* compute transmittance over full step */
-				transmittance = volume_color_transmittance(coeff.sigma_t, dt);
-
-				/* decide if we will scatter or continue */
-				float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
-
-				if(1.0f - xi >= sample_transmittance) {
-					/* compute sampling distance */
-					float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
-					float new_dt = -logf(1.0f - xi)/sample_sigma_t;
-					new_t = t + new_dt;
-
-					/* transmittance and pdf */
-					float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
-					float3 pdf = coeff.sigma_t * new_transmittance;
-
-					/* throughput */
-					new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
-					scatter = true;
-				}
-				else {
-					/* throughput */
-					float pdf = dot(channel_pdf, transmittance);
-					new_tp = tp * transmittance / pdf;
-
-					/* remap xi so we can reuse it and keep thing stratified */
-					xi = 1.0f - (1.0f - xi)/sample_transmittance;
-				}
-			}
-			else
-#endif
-			if(closure_flag & SD_EXTINCTION) {
-				/* absorption only, no sampling needed */
-				transmittance = volume_color_transmittance(coeff.sigma_t, dt);
-				new_tp = tp * transmittance;
-			}
-			else {
-				new_tp = tp;
-			}
-
-			/* integrate emission attenuated by absorption */
-			if(L && (closure_flag & SD_EMISSION)) {
-				float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
-				path_radiance_accum_emission(L, state, tp, emission);
-			}
-
-			/* modify throughput */
-			if(closure_flag & SD_EXTINCTION) {
-				tp = new_tp;
-
-				/* stop if nearly all light blocked */
-				if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
-					tp = make_float3(0.0f, 0.0f, 0.0f);
-					break;
-				}
-			}
-
-			/* prepare to scatter to new direction */
-			if(scatter) {
-				/* adjust throughput and move to new location */
-				sd->P = ray->P + new_t*ray->D;
-				*throughput = tp;
-
-				return VOLUME_PATH_SCATTERED;
-			}
-			else {
-				/* accumulate transmittance */
-				accum_transmittance *= transmittance;
-			}
-		}
-
-		/* stop if at the end of the volume */
-		t = new_t;
-		if(t == ray->t)
-			break;
-	}
-
-	*throughput = tp;
-
-	return VOLUME_PATH_ATTENUATED;
+  float3 tp = *throughput;
+  const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
+
+  /* prepare for stepping */
+  int max_steps = kernel_data.integrator.volume_max_steps;
+  float step_offset, step_size;
+  kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+
+  /* compute coefficients at the start */
+  float t = 0.0f;
+  float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
+
+  /* pick random color channel, we use the Veach one-sample
+   * model with balance heuristic for the channels */
+  float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+  float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+  bool has_scatter = false;
+
+  for (int i = 0; i < max_steps; i++) {
+    /* advance to new position */
+    float new_t = min(ray->t, (i + 1) * step_size);
+    float dt = new_t - t;
+
+    /* use random position inside this segment to sample shader,
+    * for last shorter step we remap it to fit within the segment. */
+    if (new_t == ray->t) {
+      step_offset *= (new_t - t) / step_size;
+    }
+
+    float3 new_P = ray->P + ray->D * (t + step_offset);
+    VolumeShaderCoefficients coeff;
+
+    /* compute segment */
+    if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
+      int closure_flag = sd->flag;
+      float3 new_tp;
+      float3 transmittance;
+      bool scatter = false;
+
+      /* distance sampling */
+#  ifdef __VOLUME_SCATTER__
+      if ((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
+        has_scatter = true;
+
+        /* Sample channel, use MIS with balance heuristic. */
+        float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+        float3 channel_pdf;
+        int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
+
+        /* compute transmittance over full step */
+        transmittance = volume_color_transmittance(coeff.sigma_t, dt);
+
+        /* decide if we will scatter or continue */
+        float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
+
+        if (1.0f - xi >= sample_transmittance) {
+          /* compute sampling distance */
+          float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
+          float new_dt = -logf(1.0f - xi) / sample_sigma_t;
+          new_t = t + new_dt;
+
+          /* transmittance and pdf */
+          float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+          float3 pdf = coeff.sigma_t * new_transmittance;
+
+          /* throughput */
+          new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
+          scatter = true;
+        }
+        else {
+          /* throughput */
+          float pdf = dot(channel_pdf, transmittance);
+          new_tp = tp * transmittance / pdf;
+
+          /* remap xi so we can reuse it and keep thing stratified */
+          xi = 1.0f - (1.0f - xi) / sample_transmittance;
+        }
+      }
+      else
+#  endif
+          if (closure_flag & SD_EXTINCTION) {
+        /* absorption only, no sampling needed */
+        transmittance = volume_color_transmittance(coeff.sigma_t, dt);
+        new_tp = tp * transmittance;
+      }
+      else {
+        new_tp = tp;
+      }
+
+      /* integrate emission attenuated by absorption */
+      if (L && (closure_flag & SD_EMISSION)) {
+        float3 emission = kernel_volume_emission_integrate(
+            &coeff, closure_flag, transmittance, dt);
+        path_radiance_accum_emission(L, state, tp, emission);
+      }
+
+      /* modify throughput */
+      if (closure_flag & SD_EXTINCTION) {
+        tp = new_tp;
+
+        /* stop if nearly all light blocked */
+        if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
+          tp = make_float3(0.0f, 0.0f, 0.0f);
+          break;
+        }
+      }
+
+      /* prepare to scatter to new direction */
+      if (scatter) {
+        /* adjust throughput and move to new location */
+        sd->P = ray->P + new_t * ray->D;
+        *throughput = tp;
+
+        return VOLUME_PATH_SCATTERED;
+      }
+      else {
+        /* accumulate transmittance */
+        accum_transmittance *= transmittance;
+      }
+    }
+
+    /* stop if at the end of the volume */
+    t = new_t;
+    if (t == ray->t)
+      break;
+  }
+
+  *throughput = tp;
+
+  return VOLUME_PATH_ATTENUATED;
 }
 
 /* get the volume attenuation and emission over line segment defined by
  * ray, with the assumption that there are no surfaces blocking light
  * between the endpoints. distance sampling is used to decide if we will
  * scatter or not. */
-ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
-    KernelGlobals *kg,
-    ccl_addr_space PathState *state,
-    ShaderData *sd,
-    Ray *ray,
-    PathRadiance *L,
-    ccl_addr_space float3 *throughput,
-    bool heterogeneous)
+ccl_device_noinline VolumeIntegrateResult
+kernel_volume_integrate(KernelGlobals *kg,
+                        ccl_addr_space PathState *state,
+                        ShaderData *sd,
+                        Ray *ray,
+                        PathRadiance *L,
+                        ccl_addr_space float3 *throughput,
+                        bool heterogeneous)
 {
-	shader_setup_from_volume(kg, sd, ray);
+  shader_setup_from_volume(kg, sd, ray);
 
-	if(heterogeneous)
-		return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
-	else
-		return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
+  if (heterogeneous)
+    return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
+  else
+    return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
 }
 
-#ifndef __SPLIT_KERNEL__
+#  ifndef __SPLIT_KERNEL__
 /* Decoupled Volume Sampling
  *
  * VolumeSegment is list of coefficients and transmittance stored at all steps
@@ -689,26 +700,26 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
  * no support for malloc/free and too much stack usage with a fix size array. */
 
 typedef struct VolumeStep {
-	float3 sigma_s;				/* scatter coefficient */
-	float3 sigma_t;				/* extinction coefficient */
-	float3 accum_transmittance;	/* accumulated transmittance including this step */
-	float3 cdf_distance;		/* cumulative density function for distance sampling */
-	float t;					/* distance at end of this step */
-	float shade_t;				/* jittered distance where shading was done in step */
-	int closure_flag;			/* shader evaluation closure flags */
+  float3 sigma_s;             /* scatter coefficient */
+  float3 sigma_t;             /* extinction coefficient */
+  float3 accum_transmittance; /* accumulated transmittance including this step */
+  float3 cdf_distance;        /* cumulative density function for distance sampling */
+  float t;                    /* distance at end of this step */
+  float shade_t;              /* jittered distance where shading was done in step */
+  int closure_flag;           /* shader evaluation closure flags */
 } VolumeStep;
 
 typedef struct VolumeSegment {
-	VolumeStep stack_step;      /* stack storage for homogeneous step, to avoid malloc */
-	VolumeStep *steps;			/* recorded steps */
-	int numsteps;				/* number of steps */
-	int closure_flag;			/* accumulated closure flags from all steps */
+  VolumeStep stack_step; /* stack storage for homogeneous step, to avoid malloc */
+  VolumeStep *steps;     /* recorded steps */
+  int numsteps;          /* number of steps */
+  int closure_flag;      /* accumulated closure flags from all steps */
 
-	float3 accum_emission;		/* accumulated emission at end of segment */
-	float3 accum_transmittance;	/* accumulated transmittance at end of segment */
-	float3 accum_albedo;        /* accumulated average albedo over segment */
+  float3 accum_emission;      /* accumulated emission at end of segment */
+  float3 accum_transmittance; /* accumulated transmittance at end of segment */
+  float3 accum_albedo;        /* accumulated average albedo over segment */
 
-	int sampling_method;		/* volume sampling method */
+  int sampling_method; /* volume sampling method */
 } VolumeSegment;
 
 /* record volume steps to the end of the volume.
@@ -717,400 +728,412 @@ typedef struct VolumeSegment {
  * but the entire segment is needed to do always scattering, rather than probabilistically
  * hitting or missing the volume. if we don't know the transmittance at the end of the
  * volume we can't generate stratified distance samples up to that transmittance */
-#ifdef __VOLUME_DECOUPLED__
-ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
-	Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
+#    ifdef __VOLUME_DECOUPLED__
+ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg,
+                                               PathState *state,
+                                               Ray *ray,
+                                               ShaderData *sd,
+                                               VolumeSegment *segment,
+                                               bool heterogeneous)
 {
-	const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
-
-	/* prepare for volume stepping */
-	int max_steps;
-	float step_size, step_offset;
-
-	if(heterogeneous) {
-		max_steps = kernel_data.integrator.volume_max_steps;
-		kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
-
-#ifdef __KERNEL_CPU__
-		/* NOTE: For the branched path tracing it's possible to have direct
-		 * and indirect light integration both having volume segments allocated.
-		 * We detect this using index in the pre-allocated memory. Currently we
-		 * only support two segments allocated at a time, if more needed some
-		 * modifications to the KernelGlobals will be needed.
-		 *
-		 * This gives us restrictions that decoupled record should only happen
-		 * in the stack manner, meaning if there's subsequent call of decoupled
-		 * record it'll need to free memory before it's caller frees memory.
-		 */
-		const int index = kg->decoupled_volume_steps_index;
-		assert(index < sizeof(kg->decoupled_volume_steps) /
-		               sizeof(*kg->decoupled_volume_steps));
-		if(kg->decoupled_volume_steps[index] == NULL) {
-			kg->decoupled_volume_steps[index] =
-			        (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
-		}
-		segment->steps = kg->decoupled_volume_steps[index];
-		++kg->decoupled_volume_steps_index;
-#else
-		segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
-#endif
-	}
-	else {
-		max_steps = 1;
-		step_size = ray->t;
-		step_offset = 0.0f;
-		segment->steps = &segment->stack_step;
-	}
-
-	/* init accumulation variables */
-	float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
-	float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
-	float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
-	float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
-	float t = 0.0f;
-
-	segment->numsteps = 0;
-	segment->closure_flag = 0;
-	bool is_last_step_empty = false;
-
-	VolumeStep *step = segment->steps;
-
-	for(int i = 0; i < max_steps; i++, step++) {
-		/* advance to new position */
-		float new_t = min(ray->t, (i+1) * step_size);
-		float dt = new_t - t;
-
-		/* use random position inside this segment to sample shader,
-		* for last shorter step we remap it to fit within the segment. */
-		if(new_t == ray->t) {
-			step_offset *= (new_t - t) / step_size;
-		}
-
-		float3 new_P = ray->P + ray->D * (t + step_offset);
-		VolumeShaderCoefficients coeff;
-
-		/* compute segment */
-		if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
-			int closure_flag = sd->flag;
-			float3 sigma_t = coeff.sigma_t;
-
-			/* compute average albedo for channel sampling */
-			if(closure_flag & SD_SCATTER) {
-				accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
-			}
-
-			/* compute accumulated transmittance */
-			float3 transmittance = volume_color_transmittance(sigma_t, dt);
-
-			/* compute emission attenuated by absorption */
-			if(closure_flag & SD_EMISSION) {
-				float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
-				accum_emission += accum_transmittance * emission;
-			}
-
-			accum_transmittance *= transmittance;
-
-			/* compute pdf for distance sampling */
-			float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
-			cdf_distance = cdf_distance + pdf_distance;
-
-			/* write step data */
-			step->sigma_t = sigma_t;
-			step->sigma_s = coeff.sigma_s;
-			step->closure_flag = closure_flag;
-
-			segment->closure_flag |= closure_flag;
-
-			is_last_step_empty = false;
-			segment->numsteps++;
-		}
-		else {
-			if(is_last_step_empty) {
-				/* consecutive empty step, merge */
-				step--;
-			}
-			else {
-				/* store empty step */
-				step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
-				step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
-				step->closure_flag = 0;
-
-				segment->numsteps++;
-				is_last_step_empty = true;
-			}
-		}
-
-		step->accum_transmittance = accum_transmittance;
-		step->cdf_distance = cdf_distance;
-		step->t = new_t;
-		step->shade_t = t + step_offset;
-
-		/* stop if at the end of the volume */
-		t = new_t;
-		if(t == ray->t)
-			break;
-
-		/* stop if nearly all light blocked */
-		if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
-			break;
-	}
-
-	/* store total emission and transmittance */
-	segment->accum_emission = accum_emission;
-	segment->accum_transmittance = accum_transmittance;
-	segment->accum_albedo = accum_albedo;
-
-	/* normalize cumulative density function for distance sampling */
-	VolumeStep *last_step = segment->steps + segment->numsteps - 1;
-
-	if(!is_zero(last_step->cdf_distance)) {
-		VolumeStep *step = &segment->steps[0];
-		int numsteps = segment->numsteps;
-		float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
-
-		for(int i = 0; i < numsteps; i++, step++)
-			step->cdf_distance *= inv_cdf_distance_sum;
-	}
+  const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
+
+  /* prepare for volume stepping */
+  int max_steps;
+  float step_size, step_offset;
+
+  if (heterogeneous) {
+    max_steps = kernel_data.integrator.volume_max_steps;
+    kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+
+#      ifdef __KERNEL_CPU__
+    /* NOTE: For the branched path tracing it's possible to have direct
+     * and indirect light integration both having volume segments allocated.
+     * We detect this using index in the pre-allocated memory. Currently we
+     * only support two segments allocated at a time, if more needed some
+     * modifications to the KernelGlobals will be needed.
+     *
+     * This gives us restrictions that decoupled record should only happen
+     * in the stack manner, meaning if there's subsequent call of decoupled
+     * record it'll need to free memory before it's caller frees memory.
+     */
+    const int index = kg->decoupled_volume_steps_index;
+    assert(index < sizeof(kg->decoupled_volume_steps) / sizeof(*kg->decoupled_volume_steps));
+    if (kg->decoupled_volume_steps[index] == NULL) {
+      kg->decoupled_volume_steps[index] = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps);
+    }
+    segment->steps = kg->decoupled_volume_steps[index];
+    ++kg->decoupled_volume_steps_index;
+#      else
+    segment->steps = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps);
+#      endif
+  }
+  else {
+    max_steps = 1;
+    step_size = ray->t;
+    step_offset = 0.0f;
+    segment->steps = &segment->stack_step;
+  }
+
+  /* init accumulation variables */
+  float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
+  float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
+  float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
+  float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+  float t = 0.0f;
+
+  segment->numsteps = 0;
+  segment->closure_flag = 0;
+  bool is_last_step_empty = false;
+
+  VolumeStep *step = segment->steps;
+
+  for (int i = 0; i < max_steps; i++, step++) {
+    /* advance to new position */
+    float new_t = min(ray->t, (i + 1) * step_size);
+    float dt = new_t - t;
+
+    /* use random position inside this segment to sample shader,
+    * for last shorter step we remap it to fit within the segment. */
+    if (new_t == ray->t) {
+      step_offset *= (new_t - t) / step_size;
+    }
+
+    float3 new_P = ray->P + ray->D * (t + step_offset);
+    VolumeShaderCoefficients coeff;
+
+    /* compute segment */
+    if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
+      int closure_flag = sd->flag;
+      float3 sigma_t = coeff.sigma_t;
+
+      /* compute average albedo for channel sampling */
+      if (closure_flag & SD_SCATTER) {
+        accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
+      }
+
+      /* compute accumulated transmittance */
+      float3 transmittance = volume_color_transmittance(sigma_t, dt);
+
+      /* compute emission attenuated by absorption */
+      if (closure_flag & SD_EMISSION) {
+        float3 emission = kernel_volume_emission_integrate(
+            &coeff, closure_flag, transmittance, dt);
+        accum_emission += accum_transmittance * emission;
+      }
+
+      accum_transmittance *= transmittance;
+
+      /* compute pdf for distance sampling */
+      float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
+      cdf_distance = cdf_distance + pdf_distance;
+
+      /* write step data */
+      step->sigma_t = sigma_t;
+      step->sigma_s = coeff.sigma_s;
+      step->closure_flag = closure_flag;
+
+      segment->closure_flag |= closure_flag;
+
+      is_last_step_empty = false;
+      segment->numsteps++;
+    }
+    else {
+      if (is_last_step_empty) {
+        /* consecutive empty step, merge */
+        step--;
+      }
+      else {
+        /* store empty step */
+        step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
+        step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
+        step->closure_flag = 0;
+
+        segment->numsteps++;
+        is_last_step_empty = true;
+      }
+    }
+
+    step->accum_transmittance = accum_transmittance;
+    step->cdf_distance = cdf_distance;
+    step->t = new_t;
+    step->shade_t = t + step_offset;
+
+    /* stop if at the end of the volume */
+    t = new_t;
+    if (t == ray->t)
+      break;
+
+    /* stop if nearly all light blocked */
+    if (accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps &&
+        accum_transmittance.z < tp_eps)
+      break;
+  }
+
+  /* store total emission and transmittance */
+  segment->accum_emission = accum_emission;
+  segment->accum_transmittance = accum_transmittance;
+  segment->accum_albedo = accum_albedo;
+
+  /* normalize cumulative density function for distance sampling */
+  VolumeStep *last_step = segment->steps + segment->numsteps - 1;
+
+  if (!is_zero(last_step->cdf_distance)) {
+    VolumeStep *step = &segment->steps[0];
+    int numsteps = segment->numsteps;
+    float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
+
+    for (int i = 0; i < numsteps; i++, step++)
+      step->cdf_distance *= inv_cdf_distance_sum;
+  }
 }
 
 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
 {
-	if(segment->steps != &segment->stack_step) {
-#ifdef __KERNEL_CPU__
-		/* NOTE: We only allow free last allocated segment.
-		 * No random order of alloc/free is supported.
-		 */
-		assert(kg->decoupled_volume_steps_index > 0);
-		assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
-		--kg->decoupled_volume_steps_index;
-#else
-		free(segment->steps);
-#endif
-	}
+  if (segment->steps != &segment->stack_step) {
+#      ifdef __KERNEL_CPU__
+    /* NOTE: We only allow free last allocated segment.
+     * No random order of alloc/free is supported.
+     */
+    assert(kg->decoupled_volume_steps_index > 0);
+    assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
+    --kg->decoupled_volume_steps_index;
+#      else
+    free(segment->steps);
+#      endif
+  }
 }
-#endif  /* __VOLUME_DECOUPLED__ */
+#    endif /* __VOLUME_DECOUPLED__ */
 
 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
  * marching.
  *
  * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
-ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
-	KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
-	float3 *throughput, float rphase, float rscatter,
-	const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
+ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(KernelGlobals *kg,
+                                                                 PathState *state,
+                                                                 Ray *ray,
+                                                                 ShaderData *sd,
+                                                                 float3 *throughput,
+                                                                 float rphase,
+                                                                 float rscatter,
+                                                                 const VolumeSegment *segment,
+                                                                 const float3 *light_P,
+                                                                 bool probalistic_scatter)
 {
-	kernel_assert(segment->closure_flag & SD_SCATTER);
-
-	/* Sample color channel, use MIS with balance heuristic. */
-	float3 channel_pdf;
-	int channel = kernel_volume_sample_channel(segment->accum_albedo,
-	                                           *throughput,
-	                                           rphase,
-	                                           &channel_pdf);
-
-	float xi = rscatter;
-
-	/* probabilistic scattering decision based on transmittance */
-	if(probalistic_scatter) {
-		float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
-
-		if(1.0f - xi >= sample_transmittance) {
-			/* rescale random number so we can reuse it */
-			xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
-		}
-		else {
-			*throughput /= sample_transmittance;
-			return VOLUME_PATH_MISSED;
-		}
-	}
-
-	VolumeStep *step;
-	float3 transmittance;
-	float pdf, sample_t;
-	float mis_weight = 1.0f;
-	bool distance_sample = true;
-	bool use_mis = false;
-
-	if(segment->sampling_method && light_P) {
-		if(segment->sampling_method == SD_VOLUME_MIS) {
-			/* multiple importance sample: randomly pick between
-			 * equiangular and distance sampling strategy */
-			if(xi < 0.5f) {
-				xi *= 2.0f;
-			}
-			else {
-				xi = (xi - 0.5f)*2.0f;
-				distance_sample = false;
-			}
-
-			use_mis = true;
-		}
-		else {
-			/* only equiangular sampling */
-			distance_sample = false;
-		}
-	}
-
-	/* distance sampling */
-	if(distance_sample) {
-		/* find step in cdf */
-		step = segment->steps;
-
-		float prev_t = 0.0f;
-		float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
-
-		if(segment->numsteps > 1) {
-			float prev_cdf = 0.0f;
-			float step_cdf = 1.0f;
-			float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
-
-			for(int i = 0; ; i++, step++) {
-				/* todo: optimize using binary search */
-				step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
-
-				if(xi < step_cdf || i == segment->numsteps-1)
-					break;
-
-				prev_cdf = step_cdf;
-				prev_t = step->t;
-				prev_cdf_distance = step->cdf_distance;
-			}
-
-			/* remap xi so we can reuse it */
-			xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
-
-			/* pdf for picking step */
-			step_pdf_distance = step->cdf_distance - prev_cdf_distance;
-		}
-
-		/* determine range in which we will sample */
-		float step_t = step->t - prev_t;
-
-		/* sample distance and compute transmittance */
-		float3 distance_pdf;
-		sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
-
-		/* modify pdf for hit/miss decision */
-		if(probalistic_scatter)
-			distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
-
-		pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
-
-		/* multiple importance sampling */
-		if(use_mis) {
-			float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
-			mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
-		}
-	}
-	/* equi-angular sampling */
-	else {
-		/* sample distance */
-		sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
-
-		/* find step in which sampled distance is located */
-		step = segment->steps;
-
-		float prev_t = 0.0f;
-		float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
-
-		if(segment->numsteps > 1) {
-			float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
-
-			int numsteps = segment->numsteps;
-			int high = numsteps - 1;
-			int low = 0;
-			int mid;
-
-			while(low < high) {
-				mid = (low + high) >> 1;
-
-				if(sample_t < step[mid].t)
-					high = mid;
-				else if(sample_t >= step[mid + 1].t)
-					low = mid + 1;
-				else {
-					/* found our interval in step[mid] .. step[mid+1] */
-					prev_t = step[mid].t;
-					prev_cdf_distance = step[mid].cdf_distance;
-					step += mid+1;
-					break;
-				}
-			}
-
-			if(low >= numsteps - 1) {
-				prev_t = step[numsteps - 1].t;
-				prev_cdf_distance = step[numsteps-1].cdf_distance;
-				step += numsteps - 1;
-			}
-
-			/* pdf for picking step with distance sampling */
-			step_pdf_distance = step->cdf_distance - prev_cdf_distance;
-		}
-
-		/* determine range in which we will sample */
-		float step_t = step->t - prev_t;
-		float step_sample_t = sample_t - prev_t;
-
-		/* compute transmittance */
-		transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
-
-		/* multiple importance sampling */
-		if(use_mis) {
-			float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
-			float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
-			mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
-		}
-	}
-	if(sample_t < 0.0f || pdf == 0.0f) {
-		return VOLUME_PATH_MISSED;
-	}
-
-	/* compute transmittance up to this step */
-	if(step != segment->steps)
-		transmittance *= (step-1)->accum_transmittance;
-
-	/* modify throughput */
-	*throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
-
-	/* evaluate shader to create closures at shading point */
-	if(segment->numsteps > 1) {
-		sd->P = ray->P + step->shade_t*ray->D;
-
-		VolumeShaderCoefficients coeff;
-		volume_shader_sample(kg, sd, state, sd->P, &coeff);
-	}
-
-	/* move to new position */
-	sd->P = ray->P + sample_t*ray->D;
-
-	return VOLUME_PATH_SCATTERED;
+  kernel_assert(segment->closure_flag & SD_SCATTER);
+
+  /* Sample color channel, use MIS with balance heuristic. */
+  float3 channel_pdf;
+  int channel = kernel_volume_sample_channel(
+      segment->accum_albedo, *throughput, rphase, &channel_pdf);
+
+  float xi = rscatter;
+
+  /* probabilistic scattering decision based on transmittance */
+  if (probalistic_scatter) {
+    float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
+
+    if (1.0f - xi >= sample_transmittance) {
+      /* rescale random number so we can reuse it */
+      xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance);
+    }
+    else {
+      *throughput /= sample_transmittance;
+      return VOLUME_PATH_MISSED;
+    }
+  }
+
+  VolumeStep *step;
+  float3 transmittance;
+  float pdf, sample_t;
+  float mis_weight = 1.0f;
+  bool distance_sample = true;
+  bool use_mis = false;
+
+  if (segment->sampling_method && light_P) {
+    if (segment->sampling_method == SD_VOLUME_MIS) {
+      /* multiple importance sample: randomly pick between
+       * equiangular and distance sampling strategy */
+      if (xi < 0.5f) {
+        xi *= 2.0f;
+      }
+      else {
+        xi = (xi - 0.5f) * 2.0f;
+        distance_sample = false;
+      }
+
+      use_mis = true;
+    }
+    else {
+      /* only equiangular sampling */
+      distance_sample = false;
+    }
+  }
+
+  /* distance sampling */
+  if (distance_sample) {
+    /* find step in cdf */
+    step = segment->steps;
+
+    float prev_t = 0.0f;
+    float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
+
+    if (segment->numsteps > 1) {
+      float prev_cdf = 0.0f;
+      float step_cdf = 1.0f;
+      float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+
+      for (int i = 0;; i++, step++) {
+        /* todo: optimize using binary search */
+        step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
+
+        if (xi < step_cdf || i == segment->numsteps - 1)
+          break;
+
+        prev_cdf = step_cdf;
+        prev_t = step->t;
+        prev_cdf_distance = step->cdf_distance;
+      }
+
+      /* remap xi so we can reuse it */
+      xi = (xi - prev_cdf) / (step_cdf - prev_cdf);
+
+      /* pdf for picking step */
+      step_pdf_distance = step->cdf_distance - prev_cdf_distance;
+    }
+
+    /* determine range in which we will sample */
+    float step_t = step->t - prev_t;
+
+    /* sample distance and compute transmittance */
+    float3 distance_pdf;
+    sample_t = prev_t + kernel_volume_distance_sample(
+                            step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
+
+    /* modify pdf for hit/miss decision */
+    if (probalistic_scatter)
+      distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
+
+    pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
+
+    /* multiple importance sampling */
+    if (use_mis) {
+      float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
+      mis_weight = 2.0f * power_heuristic(pdf, equi_pdf);
+    }
+  }
+  /* equi-angular sampling */
+  else {
+    /* sample distance */
+    sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
+
+    /* find step in which sampled distance is located */
+    step = segment->steps;
+
+    float prev_t = 0.0f;
+    float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
+
+    if (segment->numsteps > 1) {
+      float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+
+      int numsteps = segment->numsteps;
+      int high = numsteps - 1;
+      int low = 0;
+      int mid;
+
+      while (low < high) {
+        mid = (low + high) >> 1;
+
+        if (sample_t < step[mid].t)
+          high = mid;
+        else if (sample_t >= step[mid + 1].t)
+          low = mid + 1;
+        else {
+          /* found our interval in step[mid] .. step[mid+1] */
+          prev_t = step[mid].t;
+          prev_cdf_distance = step[mid].cdf_distance;
+          step += mid + 1;
+          break;
+        }
+      }
+
+      if (low >= numsteps - 1) {
+        prev_t = step[numsteps - 1].t;
+        prev_cdf_distance = step[numsteps - 1].cdf_distance;
+        step += numsteps - 1;
+      }
+
+      /* pdf for picking step with distance sampling */
+      step_pdf_distance = step->cdf_distance - prev_cdf_distance;
+    }
+
+    /* determine range in which we will sample */
+    float step_t = step->t - prev_t;
+    float step_sample_t = sample_t - prev_t;
+
+    /* compute transmittance */
+    transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
+
+    /* multiple importance sampling */
+    if (use_mis) {
+      float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
+      float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
+      mis_weight = 2.0f * power_heuristic(pdf, distance_pdf);
+    }
+  }
+  if (sample_t < 0.0f || pdf == 0.0f) {
+    return VOLUME_PATH_MISSED;
+  }
+
+  /* compute transmittance up to this step */
+  if (step != segment->steps)
+    transmittance *= (step - 1)->accum_transmittance;
+
+  /* modify throughput */
+  *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
+
+  /* evaluate shader to create closures at shading point */
+  if (segment->numsteps > 1) {
+    sd->P = ray->P + step->shade_t * ray->D;
+
+    VolumeShaderCoefficients coeff;
+    volume_shader_sample(kg, sd, state, sd->P, &coeff);
+  }
+
+  /* move to new position */
+  sd->P = ray->P + sample_t * ray->D;
+
+  return VOLUME_PATH_SCATTERED;
 }
-#endif  /* __SPLIT_KERNEL */
+#  endif /* __SPLIT_KERNEL */
 
 /* decide if we need to use decoupled or not */
-ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
+ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg,
+                                            bool heterogeneous,
+                                            bool direct,
+                                            int sampling_method)
 {
-	/* decoupled ray marching for heterogeneous volumes not supported on the GPU,
-	 * which also means equiangular and multiple importance sampling is not
-	 * support for that case */
-	if(!kernel_data.integrator.volume_decoupled)
-		return false;
-
-#ifdef __KERNEL_GPU__
-	if(heterogeneous)
-		return false;
-#endif
-
-	/* equiangular and multiple importance sampling only implemented for decoupled */
-	if(sampling_method != 0)
-		return true;
-
-	/* for all light sampling use decoupled, reusing shader evaluations is
-	 * typically faster in that case */
-	if(direct)
-		return kernel_data.integrator.sample_all_lights_direct;
-	else
-		return kernel_data.integrator.sample_all_lights_indirect;
+  /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
+   * which also means equiangular and multiple importance sampling is not
+   * support for that case */
+  if (!kernel_data.integrator.volume_decoupled)
+    return false;
+
+#  ifdef __KERNEL_GPU__
+  if (heterogeneous)
+    return false;
+#  endif
+
+  /* equiangular and multiple importance sampling only implemented for decoupled */
+  if (sampling_method != 0)
+    return true;
+
+  /* for all light sampling use decoupled, reusing shader evaluations is
+   * typically faster in that case */
+  if (direct)
+    return kernel_data.integrator.sample_all_lights_direct;
+  else
+    return kernel_data.integrator.sample_all_lights_indirect;
 }
 
 /* Volume Stack
@@ -1124,242 +1147,231 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
                                          ccl_addr_space const Ray *ray,
                                          ccl_addr_space VolumeStack *stack)
 {
-	/* NULL ray happens in the baker, does it need proper initialization of
-	 * camera in volume?
-	 */
-	if(!kernel_data.cam.is_inside_volume || ray == NULL) {
-		/* Camera is guaranteed to be in the air, only take background volume
-		 * into account in this case.
-		 */
-		if(kernel_data.background.volume_shader != SHADER_NONE) {
-			stack[0].shader = kernel_data.background.volume_shader;
-			stack[0].object = PRIM_NONE;
-			stack[1].shader = SHADER_NONE;
-		}
-		else {
-			stack[0].shader = SHADER_NONE;
-		}
-		return;
-	}
-
-	kernel_assert(state->flag & PATH_RAY_CAMERA);
-
-	Ray volume_ray = *ray;
-	volume_ray.t = FLT_MAX;
-
-	const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
-	int stack_index = 0, enclosed_index = 0;
-
-#ifdef __VOLUME_RECORD_ALL__
-	Intersection hits[2*VOLUME_STACK_SIZE + 1];
-	uint num_hits = scene_intersect_volume_all(kg,
-	                                           &volume_ray,
-	                                           hits,
-	                                           2*VOLUME_STACK_SIZE,
-	                                           visibility);
-	if(num_hits > 0) {
-		int enclosed_volumes[VOLUME_STACK_SIZE];
-		Intersection *isect = hits;
-
-		qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-
-		for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
-			shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
-			if(stack_sd->flag & SD_BACKFACING) {
-				bool need_add = true;
-				for(int i = 0; i < enclosed_index && need_add; ++i) {
-					/* If ray exited the volume and never entered to that volume
-					 * it means that camera is inside such a volume.
-					 */
-					if(enclosed_volumes[i] == stack_sd->object) {
-						need_add = false;
-					}
-				}
-				for(int i = 0; i < stack_index && need_add; ++i) {
-					/* Don't add intersections twice. */
-					if(stack[i].object == stack_sd->object) {
-						need_add = false;
-						break;
-					}
-				}
-				if(need_add && stack_index < VOLUME_STACK_SIZE - 1) {
-					stack[stack_index].object = stack_sd->object;
-					stack[stack_index].shader = stack_sd->shader;
-					++stack_index;
-				}
-			}
-			else {
-				/* If ray from camera enters the volume, this volume shouldn't
-				 * be added to the stack on exit.
-				 */
-				enclosed_volumes[enclosed_index++] = stack_sd->object;
-			}
-		}
-	}
-#else
-	int enclosed_volumes[VOLUME_STACK_SIZE];
-	int step = 0;
-
-	while(stack_index < VOLUME_STACK_SIZE - 1 &&
-	      enclosed_index < VOLUME_STACK_SIZE - 1 &&
-	      step < 2 * VOLUME_STACK_SIZE)
-	{
-		Intersection isect;
-		if(!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
-			break;
-		}
-
-		shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
-		if(stack_sd->flag & SD_BACKFACING) {
-			/* If ray exited the volume and never entered to that volume
-			 * it means that camera is inside such a volume.
-			 */
-			bool need_add = true;
-			for(int i = 0; i < enclosed_index && need_add; ++i) {
-				/* If ray exited the volume and never entered to that volume
-				 * it means that camera is inside such a volume.
-				 */
-				if(enclosed_volumes[i] == stack_sd->object) {
-					need_add = false;
-				}
-			}
-			for(int i = 0; i < stack_index && need_add; ++i) {
-				/* Don't add intersections twice. */
-				if(stack[i].object == stack_sd->object) {
-					need_add = false;
-					break;
-				}
-			}
-			if(need_add) {
-				stack[stack_index].object = stack_sd->object;
-				stack[stack_index].shader = stack_sd->shader;
-				++stack_index;
-			}
-		}
-		else {
-			/* If ray from camera enters the volume, this volume shouldn't
-			 * be added to the stack on exit.
-			 */
-			enclosed_volumes[enclosed_index++] = stack_sd->object;
-		}
-
-		/* Move ray forward. */
-		volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
-		++step;
-	}
-#endif
-	/* stack_index of 0 means quick checks outside of the kernel gave false
-	 * positive, nothing to worry about, just we've wasted quite a few of
-	 * ticks just to come into conclusion that camera is in the air.
-	 *
-	 * In this case we're doing the same above -- check whether background has
-	 * volume.
-	 */
-	if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
-		stack[0].shader = kernel_data.background.volume_shader;
-		stack[0].object = PRIM_NONE;
-		stack[1].shader = SHADER_NONE;
-	}
-	else {
-		stack[stack_index].shader = SHADER_NONE;
-	}
+  /* NULL ray happens in the baker, does it need proper initialization of
+   * camera in volume?
+   */
+  if (!kernel_data.cam.is_inside_volume || ray == NULL) {
+    /* Camera is guaranteed to be in the air, only take background volume
+     * into account in this case.
+     */
+    if (kernel_data.background.volume_shader != SHADER_NONE) {
+      stack[0].shader = kernel_data.background.volume_shader;
+      stack[0].object = PRIM_NONE;
+      stack[1].shader = SHADER_NONE;
+    }
+    else {
+      stack[0].shader = SHADER_NONE;
+    }
+    return;
+  }
+
+  kernel_assert(state->flag & PATH_RAY_CAMERA);
+
+  Ray volume_ray = *ray;
+  volume_ray.t = FLT_MAX;
+
+  const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
+  int stack_index = 0, enclosed_index = 0;
+
+#  ifdef __VOLUME_RECORD_ALL__
+  Intersection hits[2 * VOLUME_STACK_SIZE + 1];
+  uint num_hits = scene_intersect_volume_all(
+      kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, visibility);
+  if (num_hits > 0) {
+    int enclosed_volumes[VOLUME_STACK_SIZE];
+    Intersection *isect = hits;
+
+    qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+    for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
+      shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+      if (stack_sd->flag & SD_BACKFACING) {
+        bool need_add = true;
+        for (int i = 0; i < enclosed_index && need_add; ++i) {
+          /* If ray exited the volume and never entered to that volume
+           * it means that camera is inside such a volume.
+           */
+          if (enclosed_volumes[i] == stack_sd->object) {
+            need_add = false;
+          }
+        }
+        for (int i = 0; i < stack_index && need_add; ++i) {
+          /* Don't add intersections twice. */
+          if (stack[i].object == stack_sd->object) {
+            need_add = false;
+            break;
+          }
+        }
+        if (need_add && stack_index < VOLUME_STACK_SIZE - 1) {
+          stack[stack_index].object = stack_sd->object;
+          stack[stack_index].shader = stack_sd->shader;
+          ++stack_index;
+        }
+      }
+      else {
+        /* If ray from camera enters the volume, this volume shouldn't
+         * be added to the stack on exit.
+         */
+        enclosed_volumes[enclosed_index++] = stack_sd->object;
+      }
+    }
+  }
+#  else
+  int enclosed_volumes[VOLUME_STACK_SIZE];
+  int step = 0;
+
+  while (stack_index < VOLUME_STACK_SIZE - 1 && enclosed_index < VOLUME_STACK_SIZE - 1 &&
+         step < 2 * VOLUME_STACK_SIZE) {
+    Intersection isect;
+    if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
+      break;
+    }
+
+    shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+    if (stack_sd->flag & SD_BACKFACING) {
+      /* If ray exited the volume and never entered to that volume
+       * it means that camera is inside such a volume.
+       */
+      bool need_add = true;
+      for (int i = 0; i < enclosed_index && need_add; ++i) {
+        /* If ray exited the volume and never entered to that volume
+         * it means that camera is inside such a volume.
+         */
+        if (enclosed_volumes[i] == stack_sd->object) {
+          need_add = false;
+        }
+      }
+      for (int i = 0; i < stack_index && need_add; ++i) {
+        /* Don't add intersections twice. */
+        if (stack[i].object == stack_sd->object) {
+          need_add = false;
+          break;
+        }
+      }
+      if (need_add) {
+        stack[stack_index].object = stack_sd->object;
+        stack[stack_index].shader = stack_sd->shader;
+        ++stack_index;
+      }
+    }
+    else {
+      /* If ray from camera enters the volume, this volume shouldn't
+       * be added to the stack on exit.
+       */
+      enclosed_volumes[enclosed_index++] = stack_sd->object;
+    }
+
+    /* Move ray forward. */
+    volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+    ++step;
+  }
+#  endif
+  /* stack_index of 0 means quick checks outside of the kernel gave false
+   * positive, nothing to worry about, just we've wasted quite a few of
+   * ticks just to come into conclusion that camera is in the air.
+   *
+   * In this case we're doing the same above -- check whether background has
+   * volume.
+   */
+  if (stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
+    stack[0].shader = kernel_data.background.volume_shader;
+    stack[0].object = PRIM_NONE;
+    stack[1].shader = SHADER_NONE;
+  }
+  else {
+    stack[stack_index].shader = SHADER_NONE;
+  }
 }
 
-ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack)
+ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg,
+                                               ShaderData *sd,
+                                               ccl_addr_space VolumeStack *stack)
 {
-	/* todo: we should have some way for objects to indicate if they want the
-	 * world shader to work inside them. excluding it by default is problematic
-	 * because non-volume objects can't be assumed to be closed manifolds */
-
-	if(!(sd->flag & SD_HAS_VOLUME))
-		return;
-
-	if(sd->flag & SD_BACKFACING) {
-		/* exit volume object: remove from stack */
-		for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
-			if(stack[i].object == sd->object) {
-				/* shift back next stack entries */
-				do {
-					stack[i] = stack[i+1];
-					i++;
-				}
-				while(stack[i].shader != SHADER_NONE);
-
-				return;
-			}
-		}
-	}
-	else {
-		/* enter volume object: add to stack */
-		int i;
-
-		for(i = 0; stack[i].shader != SHADER_NONE; i++) {
-			/* already in the stack? then we have nothing to do */
-			if(stack[i].object == sd->object)
-				return;
-		}
-
-		/* if we exceed the stack limit, ignore */
-		if(i >= VOLUME_STACK_SIZE-1)
-			return;
-
-		/* add to the end of the stack */
-		stack[i].shader = sd->shader;
-		stack[i].object = sd->object;
-		stack[i+1].shader = SHADER_NONE;
-	}
+  /* todo: we should have some way for objects to indicate if they want the
+   * world shader to work inside them. excluding it by default is problematic
+   * because non-volume objects can't be assumed to be closed manifolds */
+
+  if (!(sd->flag & SD_HAS_VOLUME))
+    return;
+
+  if (sd->flag & SD_BACKFACING) {
+    /* exit volume object: remove from stack */
+    for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
+      if (stack[i].object == sd->object) {
+        /* shift back next stack entries */
+        do {
+          stack[i] = stack[i + 1];
+          i++;
+        } while (stack[i].shader != SHADER_NONE);
+
+        return;
+      }
+    }
+  }
+  else {
+    /* enter volume object: add to stack */
+    int i;
+
+    for (i = 0; stack[i].shader != SHADER_NONE; i++) {
+      /* already in the stack? then we have nothing to do */
+      if (stack[i].object == sd->object)
+        return;
+    }
+
+    /* if we exceed the stack limit, ignore */
+    if (i >= VOLUME_STACK_SIZE - 1)
+      return;
+
+    /* add to the end of the stack */
+    stack[i].shader = sd->shader;
+    stack[i].object = sd->object;
+    stack[i + 1].shader = SHADER_NONE;
+  }
 }
 
-#ifdef __SUBSURFACE__
+#  ifdef __SUBSURFACE__
 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
                                                           ShaderData *stack_sd,
                                                           Ray *ray,
                                                           ccl_addr_space VolumeStack *stack)
 {
-	kernel_assert(kernel_data.integrator.use_volumes);
-
-	Ray volume_ray = *ray;
-
-#  ifdef __VOLUME_RECORD_ALL__
-	Intersection hits[2*VOLUME_STACK_SIZE + 1];
-	uint num_hits = scene_intersect_volume_all(kg,
-	                                           &volume_ray,
-	                                           hits,
-	                                           2*VOLUME_STACK_SIZE,
-	                                           PATH_RAY_ALL_VISIBILITY);
-	if(num_hits > 0) {
-		Intersection *isect = hits;
-
-		qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-
-		for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
-			shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
-			kernel_volume_stack_enter_exit(kg, stack_sd, stack);
-		}
-	}
-#  else
-	Intersection isect;
-	int step = 0;
-	float3 Pend = ray->P + ray->D*ray->t;
-	while(step < 2 * VOLUME_STACK_SIZE &&
-	      scene_intersect_volume(kg,
-	                             &volume_ray,
-	                             &isect,
-	                             PATH_RAY_ALL_VISIBILITY))
-	{
-		shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
-		kernel_volume_stack_enter_exit(kg, stack_sd, stack);
-
-		/* Move ray forward. */
-		volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
-		if(volume_ray.t != FLT_MAX) {
-			volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
-		}
-		++step;
-	}
-#  endif
+  kernel_assert(kernel_data.integrator.use_volumes);
+
+  Ray volume_ray = *ray;
+
+#    ifdef __VOLUME_RECORD_ALL__
+  Intersection hits[2 * VOLUME_STACK_SIZE + 1];
+  uint num_hits = scene_intersect_volume_all(
+      kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, PATH_RAY_ALL_VISIBILITY);
+  if (num_hits > 0) {
+    Intersection *isect = hits;
+
+    qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+    for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
+      shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+      kernel_volume_stack_enter_exit(kg, stack_sd, stack);
+    }
+  }
+#    else
+  Intersection isect;
+  int step = 0;
+  float3 Pend = ray->P + ray->D * ray->t;
+  while (step < 2 * VOLUME_STACK_SIZE &&
+         scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
+    shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+    kernel_volume_stack_enter_exit(kg, stack_sd, stack);
+
+    /* Move ray forward. */
+    volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+    if (volume_ray.t != FLT_MAX) {
+      volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
+    }
+    ++step;
+  }
+#    endif
 }
-#endif
+#  endif
 
 /* Clean stack after the last bounce.
  *
@@ -1378,15 +1390,15 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
 ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
                                                  ccl_addr_space VolumeStack *volume_stack)
 {
-	if(kernel_data.background.volume_shader != SHADER_NONE) {
-		/* Keep the world's volume in stack. */
-		volume_stack[1].shader = SHADER_NONE;
-	}
-	else {
-		volume_stack[0].shader = SHADER_NONE;
-	}
+  if (kernel_data.background.volume_shader != SHADER_NONE) {
+    /* Keep the world's volume in stack. */
+    volume_stack[1].shader = SHADER_NONE;
+  }
+  else {
+    volume_stack[0].shader = SHADER_NONE;
+  }
 }
 
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 9667156eaf5..799561a7466 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -35,27 +35,26 @@ ccl_device bool get_next_work(KernelGlobals *kg,
                               uint ray_index,
                               ccl_private uint *global_work_index)
 {
-	/* With a small amount of work there may be more threads than work due to
-	 * rounding up of global size, stop such threads immediately. */
-	if(ray_index >= total_work_size) {
-		return false;
-	}
+  /* With a small amount of work there may be more threads than work due to
+   * rounding up of global size, stop such threads immediately. */
+  if (ray_index >= total_work_size) {
+    return false;
+  }
 
-	/* Increase atomic work index counter in pool. */
-	uint pool = ray_index / WORK_POOL_SIZE;
-	uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]);
+  /* Increase atomic work index counter in pool. */
+  uint pool = ray_index / WORK_POOL_SIZE;
+  uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]);
 
-	/* Map per-pool work index to a global work index. */
-	uint global_size = ccl_global_size(0) * ccl_global_size(1);
-	kernel_assert(global_size % WORK_POOL_SIZE == 0);
-	kernel_assert(ray_index < global_size);
+  /* Map per-pool work index to a global work index. */
+  uint global_size = ccl_global_size(0) * ccl_global_size(1);
+  kernel_assert(global_size % WORK_POOL_SIZE == 0);
+  kernel_assert(ray_index < global_size);
 
-	*global_work_index = (work_index / WORK_POOL_SIZE) * global_size
-	                   + (pool * WORK_POOL_SIZE)
-	                   + (work_index % WORK_POOL_SIZE);
+  *global_work_index = (work_index / WORK_POOL_SIZE) * global_size + (pool * WORK_POOL_SIZE) +
+                       (work_index % WORK_POOL_SIZE);
 
-	/* Test if all work for this pool is done. */
-	return (*global_work_index < total_work_size);
+  /* Test if all work for this pool is done. */
+  return (*global_work_index < total_work_size);
 }
 #endif
 
@@ -67,22 +66,22 @@ ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
                                       ccl_private uint *sample)
 {
 #ifdef __KERNEL_CUDA__
-	/* Keeping threads for the same pixel together improves performance on CUDA. */
-	uint sample_offset = global_work_index % tile->num_samples;
-	uint pixel_offset = global_work_index / tile->num_samples;
-#else /* __KERNEL_CUDA__ */
-	uint tile_pixels = tile->w * tile->h;
-	uint sample_offset = global_work_index / tile_pixels;
-	uint pixel_offset = global_work_index - sample_offset * tile_pixels;
+  /* Keeping threads for the same pixel together improves performance on CUDA. */
+  uint sample_offset = global_work_index % tile->num_samples;
+  uint pixel_offset = global_work_index / tile->num_samples;
+#else  /* __KERNEL_CUDA__ */
+  uint tile_pixels = tile->w * tile->h;
+  uint sample_offset = global_work_index / tile_pixels;
+  uint pixel_offset = global_work_index - sample_offset * tile_pixels;
 #endif /* __KERNEL_CUDA__ */
-	uint y_offset = pixel_offset / tile->w;
-	uint x_offset = pixel_offset - y_offset * tile->w;
+  uint y_offset = pixel_offset / tile->w;
+  uint x_offset = pixel_offset - y_offset * tile->w;
 
-	*x = tile->x + x_offset;
-	*y = tile->y + y_offset;
-	*sample = tile->start_sample + sample_offset;
+  *x = tile->x + x_offset;
+  *y = tile->y + y_offset;
+  *sample = tile->start_sample + sample_offset;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_WORK_STEALING_H__ */
+#endif /* __KERNEL_WORK_STEALING_H__ */
diff --git a/intern/cycles/kernel/kernels/cpu/filter.cpp b/intern/cycles/kernel/kernels/cpu/filter.cpp
index 2ff1a392dc3..145a6b6ac40 100644
--- a/intern/cycles/kernel/kernels/cpu/filter.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter.cpp
@@ -53,7 +53,7 @@
 
 /* quiet unused define warnings */
 #if defined(__KERNEL_SSE2__)
-    /* do nothing */
+/* do nothing */
 #endif
 
 #include "kernel/filter/filter.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
index 4a9e6047ecf..1d68214c8e7 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
@@ -32,7 +32,7 @@
 #    define __KERNEL_SSE41__
 #    define __KERNEL_AVX__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
 
 #include "kernel/filter/filter.h"
 #define KERNEL_ARCH cpu_avx
diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
index c22ec576254..b6709fbc529 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
@@ -33,7 +33,7 @@
 #    define __KERNEL_AVX__
 #    define __KERNEL_AVX2__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
 
 #include "kernel/filter/filter.h"
 #define KERNEL_ARCH cpu_avx2
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index 02c85562db8..1423b182ab8 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -25,7 +25,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
                                                      float *sampleV,
                                                      float *sampleVV,
                                                      float *bufferV,
-                                                     int* prefilter_rect,
+                                                     int *prefilter_rect,
                                                      int buffer_pass_stride,
                                                      int buffer_denoising_offset);
 
@@ -38,7 +38,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
                                                    float *mean,
                                                    float *variance,
                                                    float scale,
-                                                   int* prefilter_rect,
+                                                   int *prefilter_rect,
                                                    int buffer_pass_stride,
                                                    int buffer_denoising_offset);
 
@@ -49,9 +49,10 @@ void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
                                                      float *from,
                                                      float *buffer,
                                                      int out_offset,
-                                                     int* prefilter_rect);
+                                                     int *prefilter_rect);
 
-void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x,
+                                                       int y,
                                                        ccl_global float *image,
                                                        ccl_global float *variance,
                                                        ccl_global float *depth,
@@ -59,22 +60,17 @@ void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
                                                        int *rect,
                                                        int pass_stride);
 
-void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
-                                                      float *mean,
-                                                      float *variance,
-                                                      float *a,
-                                                      float *b,
-                                                      int* prefilter_rect,
-                                                      int r);
+void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(
+    int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r);
 
-void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
+void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer,
                                                            TileInfo *tiles,
                                                            int x,
                                                            int y,
                                                            int storage_ofs,
                                                            float *transform,
                                                            int *rank,
-                                                           int* rect,
+                                                           int *rect,
                                                            int pass_stride,
                                                            int frame_stride,
                                                            bool use_time,
@@ -87,24 +83,18 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
                                                            float *variance_image,
                                                            float *scale_image,
                                                            float *difference_image,
-                                                           int* rect,
+                                                           int *rect,
                                                            int stride,
                                                            int channel_offset,
                                                            int frame_offset,
                                                            float a,
                                                            float k_2);
 
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image,
-                                                float *out_image,
-                                                int* rect,
-                                                int stride,
-                                                int f);
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(
+    float *difference_image, float *out_image, int *rect, int stride, int f);
 
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image,
-                                                       float *out_image,
-                                                       int* rect,
-                                                       int stride,
-                                                       int f);
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(
+    float *difference_image, float *out_image, int *rect, int stride, int f);
 
 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
                                                          int dy,
@@ -113,7 +103,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
                                                          float *temp_image,
                                                          float *out_image,
                                                          float *accum_image,
-                                                         int* rect,
+                                                         int *rect,
                                                          int channel_offset,
                                                          int stride,
                                                          int f);
@@ -137,7 +127,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
 
 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
                                                      float *accum_image,
-                                                     int* rect,
+                                                     int *rect,
                                                      int stride);
 
 void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index c29505880cb..3d4cb87e104 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -25,12 +25,12 @@
 #include "kernel/filter/filter_kernel.h"
 
 #ifdef KERNEL_STUB
-#  define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!"))
+#  define STUB_ASSERT(arch, name) \
+    assert(!(#name " kernel stub for architecture " #arch " was called!"))
 #endif
 
 CCL_NAMESPACE_BEGIN
 
-
 /* Denoise filter */
 
 void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
@@ -42,23 +42,25 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
                                                      float *sampleVariance,
                                                      float *sampleVarianceV,
                                                      float *bufferVariance,
-                                                     int* prefilter_rect,
+                                                     int *prefilter_rect,
                                                      int buffer_pass_stride,
                                                      int buffer_denoising_offset)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
+  STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
 #else
-	kernel_filter_divide_shadow(sample, tile_info,
-	                            x, y,
-	                            unfilteredA,
-	                            unfilteredB,
-	                            sampleVariance,
-	                            sampleVarianceV,
-	                            bufferVariance,
-	                            load_int4(prefilter_rect),
-	                            buffer_pass_stride,
-	                            buffer_denoising_offset);
+  kernel_filter_divide_shadow(sample,
+                              tile_info,
+                              x,
+                              y,
+                              unfilteredA,
+                              unfilteredB,
+                              sampleVariance,
+                              sampleVarianceV,
+                              bufferVariance,
+                              load_int4(prefilter_rect),
+                              buffer_pass_stride,
+                              buffer_denoising_offset);
 #endif
 }
 
@@ -68,23 +70,28 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
                                                    int v_offset,
                                                    int x,
                                                    int y,
-                                                   float *mean, float *variance,
+                                                   float *mean,
+                                                   float *variance,
                                                    float scale,
-                                                   int* prefilter_rect,
+                                                   int *prefilter_rect,
                                                    int buffer_pass_stride,
                                                    int buffer_denoising_offset)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
+  STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
 #else
-	kernel_filter_get_feature(sample, tile_info,
-	                          m_offset, v_offset,
-	                          x, y,
-	                          mean, variance,
-	                          scale,
-	                          load_int4(prefilter_rect),
-	                          buffer_pass_stride,
-	                          buffer_denoising_offset);
+  kernel_filter_get_feature(sample,
+                            tile_info,
+                            m_offset,
+                            v_offset,
+                            x,
+                            y,
+                            mean,
+                            variance,
+                            scale,
+                            load_int4(prefilter_rect),
+                            buffer_pass_stride,
+                            buffer_denoising_offset);
 #endif
 }
 
@@ -95,16 +102,18 @@ void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
                                                      float *from,
                                                      float *buffer,
                                                      int out_offset,
-                                                     int* prefilter_rect)
+                                                     int *prefilter_rect)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_write_feature);
+  STUB_ASSERT(KERNEL_ARCH, filter_write_feature);
 #else
-	kernel_filter_write_feature(sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect));
+  kernel_filter_write_feature(
+      sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect));
 #endif
 }
 
-void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x,
+                                                       int y,
                                                        ccl_global float *image,
                                                        ccl_global float *variance,
                                                        ccl_global float *depth,
@@ -113,35 +122,31 @@ void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
                                                        int pass_stride)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
+  STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
 #else
-	kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride);
+  kernel_filter_detect_outliers(
+      x, y, image, variance, depth, output, load_int4(rect), pass_stride);
 #endif
 }
 
-void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
-                                                      float *mean,
-                                                      float *variance,
-                                                      float *a,
-                                                      float *b,
-                                                      int* prefilter_rect,
-                                                      int r)
+void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(
+    int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_combine_halves);
+  STUB_ASSERT(KERNEL_ARCH, filter_combine_halves);
 #else
-	kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r);
+  kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r);
 #endif
 }
 
-void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
+void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer,
                                                            TileInfo *tile_info,
                                                            int x,
                                                            int y,
                                                            int storage_ofs,
                                                            float *transform,
                                                            int *rank,
-                                                           int* prefilter_rect,
+                                                           int *prefilter_rect,
                                                            int pass_stride,
                                                            int frame_stride,
                                                            bool use_time,
@@ -149,21 +154,22 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
                                                            float pca_threshold)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
+  STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
 #else
-	rank += storage_ofs;
-	transform += storage_ofs*TRANSFORM_SIZE;
-	kernel_filter_construct_transform(buffer,
-	                                  tile_info,
-	                                  x, y,
-	                                  load_int4(prefilter_rect),
-	                                  pass_stride,
-	                                  frame_stride,
-	                                  use_time,
-	                                  transform,
-	                                  rank,
-	                                  radius,
-	                                  pca_threshold);
+  rank += storage_ofs;
+  transform += storage_ofs * TRANSFORM_SIZE;
+  kernel_filter_construct_transform(buffer,
+                                    tile_info,
+                                    x,
+                                    y,
+                                    load_int4(prefilter_rect),
+                                    pass_stride,
+                                    frame_stride,
+                                    use_time,
+                                    transform,
+                                    rank,
+                                    radius,
+                                    pca_threshold);
 #endif
 }
 
@@ -181,44 +187,40 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
                                                            float k_2)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference);
+  STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference);
 #else
-	kernel_filter_nlm_calc_difference(dx, dy,
-	                                  weight_image,
-	                                  variance_image,
-	                                  scale_image,
-	                                  difference_image,
-	                                  load_int4(rect),
-	                                  stride,
-	                                  channel_offset,
-	                                  frame_offset,
-	                                  a, k_2);
+  kernel_filter_nlm_calc_difference(dx,
+                                    dy,
+                                    weight_image,
+                                    variance_image,
+                                    scale_image,
+                                    difference_image,
+                                    load_int4(rect),
+                                    stride,
+                                    channel_offset,
+                                    frame_offset,
+                                    a,
+                                    k_2);
 #endif
 }
 
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image,
-                                                float *out_image,
-                                                int *rect,
-                                                int stride,
-                                                int f)
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(
+    float *difference_image, float *out_image, int *rect, int stride, int f)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur);
+  STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur);
 #else
-	kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f);
+  kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f);
 #endif
 }
 
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image,
-                                                       float *out_image,
-                                                       int *rect,
-                                                       int stride,
-                                                       int f)
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(
+    float *difference_image, float *out_image, int *rect, int stride, int f)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight);
+  STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight);
 #else
-	kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f);
+  kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f);
 #endif
 }
 
@@ -235,17 +237,19 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
                                                          int f)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
+  STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
 #else
-	kernel_filter_nlm_update_output(dx, dy,
-	                                difference_image,
-	                                image,
-	                                temp_image,
-	                                out_image,
-	                                accum_image,
-	                                load_int4(rect),
-	                                channel_offset,
-	                                stride, f);
+  kernel_filter_nlm_update_output(dx,
+                                  dy,
+                                  difference_image,
+                                  image,
+                                  temp_image,
+                                  out_image,
+                                  accum_image,
+                                  load_int4(rect),
+                                  channel_offset,
+                                  stride,
+                                  f);
 #endif
 }
 
@@ -267,19 +271,24 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
                                                              bool use_time)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
+  STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
 #else
-	kernel_filter_nlm_construct_gramian(dx, dy, t,
-	                                    difference_image,
-	                                    buffer,
-	                                    transform, rank,
-	                                    XtWX, XtWY,
-	                                    load_int4(rect),
-	                                    load_int4(filter_window),
-	                                    stride, f,
-	                                    pass_stride,
-	                                    frame_offset,
-	                                    use_time);
+  kernel_filter_nlm_construct_gramian(dx,
+                                      dy,
+                                      t,
+                                      difference_image,
+                                      buffer,
+                                      transform,
+                                      rank,
+                                      XtWX,
+                                      XtWY,
+                                      load_int4(rect),
+                                      load_int4(filter_window),
+                                      stride,
+                                      f,
+                                      pass_stride,
+                                      frame_offset,
+                                      use_time);
 #endif
 }
 
@@ -289,9 +298,9 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
                                                      int stride)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize);
+  STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize);
 #else
-	kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride);
+  kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride);
 #endif
 }
 
@@ -306,12 +315,12 @@ void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
                                                 int sample)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_finalize);
+  STUB_ASSERT(KERNEL_ARCH, filter_finalize);
 #else
-	XtWX += storage_ofs*XTWX_SIZE;
-	XtWY += storage_ofs*XTWY_SIZE;
-	rank += storage_ofs;
-	kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample);
+  XtWX += storage_ofs * XTWX_SIZE;
+  XtWY += storage_ofs * XTWY_SIZE;
+  rank += storage_ofs;
+  kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample);
 #endif
 }
 
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
index f7c9935f1d0..6c6c3e78696 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
@@ -27,7 +27,7 @@
 #  if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
 #    define __KERNEL_SSE2__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
 
 #include "kernel/filter/filter.h"
 #define KERNEL_ARCH cpu_sse2
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
index 070b95a3505..e2243000331 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
@@ -29,7 +29,7 @@
 #    define __KERNEL_SSE3__
 #    define __KERNEL_SSSE3__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
 
 #include "kernel/filter/filter.h"
 #define KERNEL_ARCH cpu_sse3
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
index 254025be4e2..068889365e3 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
@@ -31,7 +31,7 @@
 #    define __KERNEL_SSSE3__
 #    define __KERNEL_SSE41__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
 
 #include "kernel/filter/filter.h"
 #define KERNEL_ARCH cpu_sse41
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index de487f6123f..f2146302a27 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -53,7 +53,7 @@
 
 /* quiet unused define warnings */
 #if defined(__KERNEL_SSE2__)
-    /* do nothing */
+/* do nothing */
 #endif
 
 #include "kernel/kernel.h"
@@ -66,29 +66,27 @@ CCL_NAMESPACE_BEGIN
 
 void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size)
 {
-	if(strcmp(name, "__data") == 0)
-		memcpy(&kg->__data, host, size);
-	else
-		assert(0);
+  if (strcmp(name, "__data") == 0)
+    memcpy(&kg->__data, host, size);
+  else
+    assert(0);
 }
 
-void kernel_tex_copy(KernelGlobals *kg,
-                     const char *name,
-                     void *mem,
-                     size_t size)
+void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
 {
-	if(0) {
-	}
+  if (0) {
+  }
 
 #define KERNEL_TEX(type, tname) \
-	else if(strcmp(name, #tname) == 0) { \
-		kg->tname.data = (type*)mem; \
-		kg->tname.width = size; \
-	}
+  else if (strcmp(name, #tname) == 0) \
+  { \
+    kg->tname.data = (type *)mem; \
+    kg->tname.width = size; \
+  }
 #include "kernel/kernel_textures.h"
-	else {
-		assert(0);
-	}
+  else {
+    assert(0);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
index a645fb4d8dd..0656fc9dd00 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
@@ -32,7 +32,7 @@
 #    define __KERNEL_SSE41__
 #    define __KERNEL_AVX__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
 
 #include "kernel/kernel.h"
 #define KERNEL_ARCH cpu_avx
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
index 6bbb87727b9..5baafdc699e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
@@ -33,7 +33,7 @@
 #    define __KERNEL_AVX__
 #    define __KERNEL_AVX2__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
 
 #include "kernel/kernel.h"
 #define KERNEL_ARCH cpu_avx2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 6bdb8546a24..f5d981fb71a 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -16,25 +16,24 @@
 
 /* Templated common declaration part of all CPU kernels. */
 
-void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
-                                           float *buffer,
-                                           int sample,
-                                           int x, int y,
-                                           int offset,
-                                           int stride);
+void KERNEL_FUNCTION_FULL_NAME(path_trace)(
+    KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
 
 void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
                                                 uchar4 *rgba,
                                                 float *buffer,
                                                 float sample_scale,
-                                                int x, int y,
-                                                int offset, int stride);
+                                                int x,
+                                                int y,
+                                                int offset,
+                                                int stride);
 
 void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
                                                       uchar4 *rgba,
                                                       float *buffer,
                                                       float sample_scale,
-                                                      int x, int y,
+                                                      int x,
+                                                      int y,
                                                       int offset,
                                                       int stride);
 
@@ -49,24 +48,28 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
 
 /* Split kernels */
 
-void KERNEL_FUNCTION_FULL_NAME(data_init)(
-        KernelGlobals *kg,
-        ccl_constant KernelData *data,
-        ccl_global void *split_data_buffer,
-        int num_elements,
-        ccl_global char *ray_state,
-        int start_sample,
-        int end_sample,
-        int sx, int sy, int sw, int sh, int offset, int stride,
-        ccl_global int *Queue_index,
-        int queuesize,
-        ccl_global char *use_queues_flag,
-        ccl_global unsigned int *work_pool_wgs,
-        unsigned int num_samples,
-        ccl_global float *buffer);
+void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg,
+                                          ccl_constant KernelData *data,
+                                          ccl_global void *split_data_buffer,
+                                          int num_elements,
+                                          ccl_global char *ray_state,
+                                          int start_sample,
+                                          int end_sample,
+                                          int sx,
+                                          int sy,
+                                          int sw,
+                                          int sh,
+                                          int offset,
+                                          int stride,
+                                          ccl_global int *Queue_index,
+                                          int queuesize,
+                                          ccl_global char *use_queues_flag,
+                                          ccl_global unsigned int *work_pool_wgs,
+                                          unsigned int num_samples,
+                                          ccl_global float *buffer);
 
 #define DECLARE_SPLIT_KERNEL_FUNCTION(name) \
-	void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data);
+  void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * data);
 
 DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
 DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index ae4fd85780d..4289e2bbb85 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -19,523 +19,508 @@
 
 CCL_NAMESPACE_BEGIN
 
-template<typename T> struct TextureInterpolator  {
+template<typename T> struct TextureInterpolator {
 #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
-	{ \
-		u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
-		u[1] =  ((      0.5f * t - 1.0f) * t       ) * t + (2.0f/3.0f); \
-		u[2] =  ((     -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
-		u[3] = (1.0f / 6.0f) * t * t * t; \
-	} (void) 0
-
-	static ccl_always_inline float4 read(float4 r)
-	{
-		return r;
-	}
-
-	static ccl_always_inline float4 read(uchar4 r)
-	{
-		float f = 1.0f / 255.0f;
-		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
-	}
-
-	static ccl_always_inline float4 read(uchar r)
-	{
-		float f = r * (1.0f / 255.0f);
-		return make_float4(f, f, f, 1.0f);
-	}
-
-	static ccl_always_inline float4 read(float r)
-	{
-		/* TODO(dingto): Optimize this, so interpolation
-		 * happens on float instead of float4 */
-		return make_float4(r, r, r, 1.0f);
-	}
-
-	static ccl_always_inline float4 read(half4 r)
-	{
-		return half4_to_float4(r);
-	}
-
-	static ccl_always_inline float4 read(half r)
-	{
-		float f = half_to_float(r);
-		return make_float4(f, f, f, 1.0f);
-	}
-
-	static ccl_always_inline float4 read(uint16_t r)
-	{
-		float f = r*(1.0f/65535.0f);
-		return make_float4(f, f, f, 1.0f);
-	}
-
-	static ccl_always_inline float4 read(ushort4 r)
-	{
-		float f = 1.0f/65535.0f;
-		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
-	}
-
-	static ccl_always_inline float4 read(const T *data,
-	                                     int x, int y,
-	                                     int width, int height)
-	{
-		if(x < 0 || y < 0 || x >= width || y >= height) {
-			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-		return read(data[y * width + x]);
-	}
-
-	static ccl_always_inline int wrap_periodic(int x, int width)
-	{
-		x %= width;
-		if(x < 0)
-			x += width;
-		return x;
-	}
-
-	static ccl_always_inline int wrap_clamp(int x, int width)
-	{
-		return clamp(x, 0, width-1);
-	}
-
-	static ccl_always_inline float frac(float x, int *ix)
-	{
-		int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
-		*ix = i;
-		return x - (float)i;
-	}
-
-	/* ********  2D interpolation ******** */
-
-	static ccl_always_inline float4 interp_closest(const TextureInfo& info,
-	                                               float x, float y)
-	{
-		const T *data = (const T*)info.data;
-		const int width = info.width;
-		const int height = info.height;
-		int ix, iy;
-		frac(x*(float)width, &ix);
-		frac(y*(float)height, &iy);
-		switch(info.extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				break;
-			case EXTENSION_CLIP:
-				if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-				ATTR_FALLTHROUGH;
-			case EXTENSION_EXTEND:
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-		return read(data[ix + iy*width]);
-	}
-
-	static ccl_always_inline float4 interp_linear(const TextureInfo& info,
-	                                              float x, float y)
-	{
-		const T *data = (const T*)info.data;
-		const int width = info.width;
-		const int height = info.height;
-		int ix, iy, nix, niy;
-		const float tx = frac(x*(float)width - 0.5f, &ix);
-		const float ty = frac(y*(float)height - 0.5f, &iy);
-		switch(info.extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				nix = wrap_periodic(ix+1, width);
-				niy = wrap_periodic(iy+1, height);
-				break;
-			case EXTENSION_CLIP:
-				nix = ix + 1;
-				niy = iy + 1;
-				break;
-			case EXTENSION_EXTEND:
-				nix = wrap_clamp(ix+1, width);
-				niy = wrap_clamp(iy+1, height);
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-		return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) +
-		       (1.0f - ty) * tx * read(data, nix, iy, width, height) +
-		       ty * (1.0f - tx) * read(data, ix, niy, width, height) +
-		       ty * tx * read(data, nix, niy, width, height);
-	}
-
-	static ccl_always_inline float4 interp_cubic(const TextureInfo& info,
-	                                             float x, float y)
-	{
-		const T *data = (const T*)info.data;
-		const int width = info.width;
-		const int height = info.height;
-		int ix, iy, nix, niy;
-		const float tx = frac(x*(float)width - 0.5f, &ix);
-		const float ty = frac(y*(float)height - 0.5f, &iy);
-		int pix, piy, nnix, nniy;
-		switch(info.extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				pix = wrap_periodic(ix-1, width);
-				piy = wrap_periodic(iy-1, height);
-				nix = wrap_periodic(ix+1, width);
-				niy = wrap_periodic(iy+1, height);
-				nnix = wrap_periodic(ix+2, width);
-				nniy = wrap_periodic(iy+2, height);
-				break;
-			case EXTENSION_CLIP:
-				pix = ix - 1;
-				piy = iy - 1;
-				nix = ix + 1;
-				niy = iy + 1;
-				nnix = ix + 2;
-				nniy = iy + 2;
-				break;
-			case EXTENSION_EXTEND:
-				pix = wrap_clamp(ix-1, width);
-				piy = wrap_clamp(iy-1, height);
-				nix = wrap_clamp(ix+1, width);
-				niy = wrap_clamp(iy+1, height);
-				nnix = wrap_clamp(ix+2, width);
-				nniy = wrap_clamp(iy+2, height);
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-		const int xc[4] = {pix, ix, nix, nnix};
-		const int yc[4] = {piy, iy, niy, nniy};
-		float u[4], v[4];
-		/* Some helper macro to keep code reasonable size,
-		 * let compiler to inline all the matrix multiplications.
-		 */
+  { \
+    u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
+    u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
+    u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
+    u[3] = (1.0f / 6.0f) * t * t * t; \
+  } \
+  (void)0
+
+  static ccl_always_inline float4 read(float4 r)
+  {
+    return r;
+  }
+
+  static ccl_always_inline float4 read(uchar4 r)
+  {
+    float f = 1.0f / 255.0f;
+    return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+  }
+
+  static ccl_always_inline float4 read(uchar r)
+  {
+    float f = r * (1.0f / 255.0f);
+    return make_float4(f, f, f, 1.0f);
+  }
+
+  static ccl_always_inline float4 read(float r)
+  {
+    /* TODO(dingto): Optimize this, so interpolation
+     * happens on float instead of float4 */
+    return make_float4(r, r, r, 1.0f);
+  }
+
+  static ccl_always_inline float4 read(half4 r)
+  {
+    return half4_to_float4(r);
+  }
+
+  static ccl_always_inline float4 read(half r)
+  {
+    float f = half_to_float(r);
+    return make_float4(f, f, f, 1.0f);
+  }
+
+  static ccl_always_inline float4 read(uint16_t r)
+  {
+    float f = r * (1.0f / 65535.0f);
+    return make_float4(f, f, f, 1.0f);
+  }
+
+  static ccl_always_inline float4 read(ushort4 r)
+  {
+    float f = 1.0f / 65535.0f;
+    return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+  }
+
+  static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height)
+  {
+    if (x < 0 || y < 0 || x >= width || y >= height) {
+      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+    return read(data[y * width + x]);
+  }
+
+  static ccl_always_inline int wrap_periodic(int x, int width)
+  {
+    x %= width;
+    if (x < 0)
+      x += width;
+    return x;
+  }
+
+  static ccl_always_inline int wrap_clamp(int x, int width)
+  {
+    return clamp(x, 0, width - 1);
+  }
+
+  static ccl_always_inline float frac(float x, int *ix)
+  {
+    int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
+    *ix = i;
+    return x - (float)i;
+  }
+
+  /* ********  2D interpolation ******** */
+
+  static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y)
+  {
+    const T *data = (const T *)info.data;
+    const int width = info.width;
+    const int height = info.height;
+    int ix, iy;
+    frac(x * (float)width, &ix);
+    frac(y * (float)height, &iy);
+    switch (info.extension) {
+      case EXTENSION_REPEAT:
+        ix = wrap_periodic(ix, width);
+        iy = wrap_periodic(iy, height);
+        break;
+      case EXTENSION_CLIP:
+        if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+          return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        }
+        ATTR_FALLTHROUGH;
+      case EXTENSION_EXTEND:
+        ix = wrap_clamp(ix, width);
+        iy = wrap_clamp(iy, height);
+        break;
+      default:
+        kernel_assert(0);
+        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+    return read(data[ix + iy * width]);
+  }
+
+  static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y)
+  {
+    const T *data = (const T *)info.data;
+    const int width = info.width;
+    const int height = info.height;
+    int ix, iy, nix, niy;
+    const float tx = frac(x * (float)width - 0.5f, &ix);
+    const float ty = frac(y * (float)height - 0.5f, &iy);
+    switch (info.extension) {
+      case EXTENSION_REPEAT:
+        ix = wrap_periodic(ix, width);
+        iy = wrap_periodic(iy, height);
+        nix = wrap_periodic(ix + 1, width);
+        niy = wrap_periodic(iy + 1, height);
+        break;
+      case EXTENSION_CLIP:
+        nix = ix + 1;
+        niy = iy + 1;
+        break;
+      case EXTENSION_EXTEND:
+        nix = wrap_clamp(ix + 1, width);
+        niy = wrap_clamp(iy + 1, height);
+        ix = wrap_clamp(ix, width);
+        iy = wrap_clamp(iy, height);
+        break;
+      default:
+        kernel_assert(0);
+        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+    return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) +
+           (1.0f - ty) * tx * read(data, nix, iy, width, height) +
+           ty * (1.0f - tx) * read(data, ix, niy, width, height) +
+           ty * tx * read(data, nix, niy, width, height);
+  }
+
+  static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y)
+  {
+    const T *data = (const T *)info.data;
+    const int width = info.width;
+    const int height = info.height;
+    int ix, iy, nix, niy;
+    const float tx = frac(x * (float)width - 0.5f, &ix);
+    const float ty = frac(y * (float)height - 0.5f, &iy);
+    int pix, piy, nnix, nniy;
+    switch (info.extension) {
+      case EXTENSION_REPEAT:
+        ix = wrap_periodic(ix, width);
+        iy = wrap_periodic(iy, height);
+        pix = wrap_periodic(ix - 1, width);
+        piy = wrap_periodic(iy - 1, height);
+        nix = wrap_periodic(ix + 1, width);
+        niy = wrap_periodic(iy + 1, height);
+        nnix = wrap_periodic(ix + 2, width);
+        nniy = wrap_periodic(iy + 2, height);
+        break;
+      case EXTENSION_CLIP:
+        pix = ix - 1;
+        piy = iy - 1;
+        nix = ix + 1;
+        niy = iy + 1;
+        nnix = ix + 2;
+        nniy = iy + 2;
+        break;
+      case EXTENSION_EXTEND:
+        pix = wrap_clamp(ix - 1, width);
+        piy = wrap_clamp(iy - 1, height);
+        nix = wrap_clamp(ix + 1, width);
+        niy = wrap_clamp(iy + 1, height);
+        nnix = wrap_clamp(ix + 2, width);
+        nniy = wrap_clamp(iy + 2, height);
+        ix = wrap_clamp(ix, width);
+        iy = wrap_clamp(iy, height);
+        break;
+      default:
+        kernel_assert(0);
+        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+    const int xc[4] = {pix, ix, nix, nnix};
+    const int yc[4] = {piy, iy, niy, nniy};
+    float u[4], v[4];
+    /* Some helper macro to keep code reasonable size,
+     * let compiler to inline all the matrix multiplications.
+     */
 #define DATA(x, y) (read(data, xc[x], yc[y], width, height))
 #define TERM(col) \
-		(v[col] * (u[0] * DATA(0, col) + \
-		           u[1] * DATA(1, col) + \
-		           u[2] * DATA(2, col) + \
-		           u[3] * DATA(3, col)))
+  (v[col] * \
+   (u[0] * DATA(0, col) + u[1] * DATA(1, col) + u[2] * DATA(2, col) + u[3] * DATA(3, col)))
 
-		SET_CUBIC_SPLINE_WEIGHTS(u, tx);
-		SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+    SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+    SET_CUBIC_SPLINE_WEIGHTS(v, ty);
 
-		/* Actual interpolation. */
-		return TERM(0) + TERM(1) + TERM(2) + TERM(3);
+    /* Actual interpolation. */
+    return TERM(0) + TERM(1) + TERM(2) + TERM(3);
 #undef TERM
 #undef DATA
-	}
-
-	static ccl_always_inline float4 interp(const TextureInfo& info,
-	                                       float x, float y)
-	{
-		if(UNLIKELY(!info.data)) {
-			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-		switch(info.interpolation) {
-			case INTERPOLATION_CLOSEST:
-				return interp_closest(info, x, y);
-			case INTERPOLATION_LINEAR:
-				return interp_linear(info, x, y);
-			default:
-				return interp_cubic(info, x, y);
-		}
-	}
-
-	/* ********  3D interpolation ******** */
-
-	static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info,
-	                                                  float x, float y, float z)
-	{
-		int width = info.width;
-		int height = info.height;
-		int depth = info.depth;
-		int ix, iy, iz;
-
-		frac(x*(float)width, &ix);
-		frac(y*(float)height, &iy);
-		frac(z*(float)depth, &iz);
-
-		switch(info.extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				iz = wrap_periodic(iz, depth);
-				break;
-			case EXTENSION_CLIP:
-				if(x < 0.0f || y < 0.0f || z < 0.0f ||
-				   x > 1.0f || y > 1.0f || z > 1.0f)
-				{
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-				ATTR_FALLTHROUGH;
-			case EXTENSION_EXTEND:
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				iz = wrap_clamp(iz, depth);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-
-		const T *data = (const T*)info.data;
-		return read(data[ix + iy*width + iz*width*height]);
-	}
-
-	static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info,
-	                                                 float x, float y, float z)
-	{
-		int width = info.width;
-		int height = info.height;
-		int depth = info.depth;
-		int ix, iy, iz;
-		int nix, niy, niz;
-
-		float tx = frac(x*(float)width - 0.5f, &ix);
-		float ty = frac(y*(float)height - 0.5f, &iy);
-		float tz = frac(z*(float)depth - 0.5f, &iz);
-
-		switch(info.extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				iz = wrap_periodic(iz, depth);
-
-				nix = wrap_periodic(ix+1, width);
-				niy = wrap_periodic(iy+1, height);
-				niz = wrap_periodic(iz+1, depth);
-				break;
-			case EXTENSION_CLIP:
-				if(x < 0.0f || y < 0.0f || z < 0.0f ||
-				   x > 1.0f || y > 1.0f || z > 1.0f)
-				{
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-				ATTR_FALLTHROUGH;
-			case EXTENSION_EXTEND:
-				nix = wrap_clamp(ix+1, width);
-				niy = wrap_clamp(iy+1, height);
-				niz = wrap_clamp(iz+1, depth);
-
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				iz = wrap_clamp(iz, depth);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-
-		const T *data = (const T*)info.data;
-		float4 r;
-
-		r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
-		r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
-		r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
-		r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
-
-		r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
-		r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
-		r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
-		r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
-
-		return r;
-	}
-
-	/* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
-	 * causing stack overflow issue in this function unless it is inlined.
-	 *
-	 * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
-	 * enabled.
-	 */
+  }
+
+  static ccl_always_inline float4 interp(const TextureInfo &info, float x, float y)
+  {
+    if (UNLIKELY(!info.data)) {
+      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+    switch (info.interpolation) {
+      case INTERPOLATION_CLOSEST:
+        return interp_closest(info, x, y);
+      case INTERPOLATION_LINEAR:
+        return interp_linear(info, x, y);
+      default:
+        return interp_cubic(info, x, y);
+    }
+  }
+
+  /* ********  3D interpolation ******** */
+
+  static ccl_always_inline float4 interp_3d_closest(const TextureInfo &info,
+                                                    float x,
+                                                    float y,
+                                                    float z)
+  {
+    int width = info.width;
+    int height = info.height;
+    int depth = info.depth;
+    int ix, iy, iz;
+
+    frac(x * (float)width, &ix);
+    frac(y * (float)height, &iy);
+    frac(z * (float)depth, &iz);
+
+    switch (info.extension) {
+      case EXTENSION_REPEAT:
+        ix = wrap_periodic(ix, width);
+        iy = wrap_periodic(iy, height);
+        iz = wrap_periodic(iz, depth);
+        break;
+      case EXTENSION_CLIP:
+        if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+          return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        }
+        ATTR_FALLTHROUGH;
+      case EXTENSION_EXTEND:
+        ix = wrap_clamp(ix, width);
+        iy = wrap_clamp(iy, height);
+        iz = wrap_clamp(iz, depth);
+        break;
+      default:
+        kernel_assert(0);
+        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+
+    const T *data = (const T *)info.data;
+    return read(data[ix + iy * width + iz * width * height]);
+  }
+
+  static ccl_always_inline float4 interp_3d_linear(const TextureInfo &info,
+                                                   float x,
+                                                   float y,
+                                                   float z)
+  {
+    int width = info.width;
+    int height = info.height;
+    int depth = info.depth;
+    int ix, iy, iz;
+    int nix, niy, niz;
+
+    float tx = frac(x * (float)width - 0.5f, &ix);
+    float ty = frac(y * (float)height - 0.5f, &iy);
+    float tz = frac(z * (float)depth - 0.5f, &iz);
+
+    switch (info.extension) {
+      case EXTENSION_REPEAT:
+        ix = wrap_periodic(ix, width);
+        iy = wrap_periodic(iy, height);
+        iz = wrap_periodic(iz, depth);
+
+        nix = wrap_periodic(ix + 1, width);
+        niy = wrap_periodic(iy + 1, height);
+        niz = wrap_periodic(iz + 1, depth);
+        break;
+      case EXTENSION_CLIP:
+        if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+          return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        }
+        ATTR_FALLTHROUGH;
+      case EXTENSION_EXTEND:
+        nix = wrap_clamp(ix + 1, width);
+        niy = wrap_clamp(iy + 1, height);
+        niz = wrap_clamp(iz + 1, depth);
+
+        ix = wrap_clamp(ix, width);
+        iy = wrap_clamp(iy, height);
+        iz = wrap_clamp(iz, depth);
+        break;
+      default:
+        kernel_assert(0);
+        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+
+    const T *data = (const T *)info.data;
+    float4 r;
+
+    r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
+        read(data[ix + iy * width + iz * width * height]);
+    r += (1.0f - tz) * (1.0f - ty) * tx * read(data[nix + iy * width + iz * width * height]);
+    r += (1.0f - tz) * ty * (1.0f - tx) * read(data[ix + niy * width + iz * width * height]);
+    r += (1.0f - tz) * ty * tx * read(data[nix + niy * width + iz * width * height]);
+
+    r += tz * (1.0f - ty) * (1.0f - tx) * read(data[ix + iy * width + niz * width * height]);
+    r += tz * (1.0f - ty) * tx * read(data[nix + iy * width + niz * width * height]);
+    r += tz * ty * (1.0f - tx) * read(data[ix + niy * width + niz * width * height]);
+    r += tz * ty * tx * read(data[nix + niy * width + niz * width * height]);
+
+    return r;
+  }
+
+  /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
+   * causing stack overflow issue in this function unless it is inlined.
+   *
+   * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
+   * enabled.
+   */
 #if defined(__GNUC__) || defined(__clang__)
-	static ccl_always_inline
+  static ccl_always_inline
 #else
-	static ccl_never_inline
+  static ccl_never_inline
 #endif
-	float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z)
-	{
-		int width = info.width;
-		int height = info.height;
-		int depth = info.depth;
-		int ix, iy, iz;
-		int nix, niy, niz;
-		/* Tricubic b-spline interpolation. */
-		const float tx = frac(x*(float)width - 0.5f, &ix);
-		const float ty = frac(y*(float)height - 0.5f, &iy);
-		const float tz = frac(z*(float)depth - 0.5f, &iz);
-		int pix, piy, piz, nnix, nniy, nniz;
-
-		switch(info.extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				iz = wrap_periodic(iz, depth);
-
-				pix = wrap_periodic(ix-1, width);
-				piy = wrap_periodic(iy-1, height);
-				piz = wrap_periodic(iz-1, depth);
-
-				nix = wrap_periodic(ix+1, width);
-				niy = wrap_periodic(iy+1, height);
-				niz = wrap_periodic(iz+1, depth);
-
-				nnix = wrap_periodic(ix+2, width);
-				nniy = wrap_periodic(iy+2, height);
-				nniz = wrap_periodic(iz+2, depth);
-				break;
-			case EXTENSION_CLIP:
-				if(x < 0.0f || y < 0.0f || z < 0.0f ||
-				   x > 1.0f || y > 1.0f || z > 1.0f)
-				{
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-				ATTR_FALLTHROUGH;
-			case EXTENSION_EXTEND:
-				pix = wrap_clamp(ix-1, width);
-				piy = wrap_clamp(iy-1, height);
-				piz = wrap_clamp(iz-1, depth);
-
-				nix = wrap_clamp(ix+1, width);
-				niy = wrap_clamp(iy+1, height);
-				niz = wrap_clamp(iz+1, depth);
-
-				nnix = wrap_clamp(ix+2, width);
-				nniy = wrap_clamp(iy+2, height);
-				nniz = wrap_clamp(iz+2, depth);
-
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				iz = wrap_clamp(iz, depth);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-
-		const int xc[4] = {pix, ix, nix, nnix};
-		const int yc[4] = {width * piy,
-		                   width * iy,
-		                   width * niy,
-		                   width * nniy};
-		const int zc[4] = {width * height * piz,
-		                   width * height * iz,
-		                   width * height * niz,
-		                   width * height * nniz};
-		float u[4], v[4], w[4];
-
-		/* Some helper macro to keep code reasonable size,
-		 * let compiler to inline all the matrix multiplications.
-		 */
+      float4
+      interp_3d_tricubic(const TextureInfo &info, float x, float y, float z)
+  {
+    int width = info.width;
+    int height = info.height;
+    int depth = info.depth;
+    int ix, iy, iz;
+    int nix, niy, niz;
+    /* Tricubic b-spline interpolation. */
+    const float tx = frac(x * (float)width - 0.5f, &ix);
+    const float ty = frac(y * (float)height - 0.5f, &iy);
+    const float tz = frac(z * (float)depth - 0.5f, &iz);
+    int pix, piy, piz, nnix, nniy, nniz;
+
+    switch (info.extension) {
+      case EXTENSION_REPEAT:
+        ix = wrap_periodic(ix, width);
+        iy = wrap_periodic(iy, height);
+        iz = wrap_periodic(iz, depth);
+
+        pix = wrap_periodic(ix - 1, width);
+        piy = wrap_periodic(iy - 1, height);
+        piz = wrap_periodic(iz - 1, depth);
+
+        nix = wrap_periodic(ix + 1, width);
+        niy = wrap_periodic(iy + 1, height);
+        niz = wrap_periodic(iz + 1, depth);
+
+        nnix = wrap_periodic(ix + 2, width);
+        nniy = wrap_periodic(iy + 2, height);
+        nniz = wrap_periodic(iz + 2, depth);
+        break;
+      case EXTENSION_CLIP:
+        if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+          return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        }
+        ATTR_FALLTHROUGH;
+      case EXTENSION_EXTEND:
+        pix = wrap_clamp(ix - 1, width);
+        piy = wrap_clamp(iy - 1, height);
+        piz = wrap_clamp(iz - 1, depth);
+
+        nix = wrap_clamp(ix + 1, width);
+        niy = wrap_clamp(iy + 1, height);
+        niz = wrap_clamp(iz + 1, depth);
+
+        nnix = wrap_clamp(ix + 2, width);
+        nniy = wrap_clamp(iy + 2, height);
+        nniz = wrap_clamp(iz + 2, depth);
+
+        ix = wrap_clamp(ix, width);
+        iy = wrap_clamp(iy, height);
+        iz = wrap_clamp(iz, depth);
+        break;
+      default:
+        kernel_assert(0);
+        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+
+    const int xc[4] = {pix, ix, nix, nnix};
+    const int yc[4] = {width * piy, width * iy, width * niy, width * nniy};
+    const int zc[4] = {
+        width * height * piz, width * height * iz, width * height * niz, width * height * nniz};
+    float u[4], v[4], w[4];
+
+    /* Some helper macro to keep code reasonable size,
+     * let compiler to inline all the matrix multiplications.
+     */
 #define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
 #define COL_TERM(col, row) \
-		(v[col] * (u[0] * DATA(0, col, row) + \
-		           u[1] * DATA(1, col, row) + \
-		           u[2] * DATA(2, col, row) + \
-		           u[3] * DATA(3, col, row)))
+  (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
+             u[3] * DATA(3, col, row)))
 #define ROW_TERM(row) \
-		(w[row] * (COL_TERM(0, row) + \
-		           COL_TERM(1, row) + \
-		           COL_TERM(2, row) + \
-		           COL_TERM(3, row)))
+  (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
 
-		SET_CUBIC_SPLINE_WEIGHTS(u, tx);
-		SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-		SET_CUBIC_SPLINE_WEIGHTS(w, tz);
+    SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+    SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+    SET_CUBIC_SPLINE_WEIGHTS(w, tz);
 
-		/* Actual interpolation. */
-		const T *data = (const T*)info.data;
-		return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
+    /* Actual interpolation. */
+    const T *data = (const T *)info.data;
+    return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
 
 #undef COL_TERM
 #undef ROW_TERM
 #undef DATA
-	}
-
-	static ccl_always_inline float4 interp_3d(const TextureInfo& info,
-	                                          float x, float y, float z,
-	                                          InterpolationType interp)
-	{
-		if(UNLIKELY(!info.data))
-			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-		switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) {
-			case INTERPOLATION_CLOSEST:
-				return interp_3d_closest(info, x, y, z);
-			case INTERPOLATION_LINEAR:
-				return interp_3d_linear(info, x, y, z);
-			default:
-				return interp_3d_tricubic(info, x, y, z);
-		}
-	}
+  }
+
+  static ccl_always_inline float4
+  interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
+  {
+    if (UNLIKELY(!info.data))
+      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+    switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
+      case INTERPOLATION_CLOSEST:
+        return interp_3d_closest(info, x, y, z);
+      case INTERPOLATION_LINEAR:
+        return interp_3d_linear(info, x, y, z);
+      default:
+        return interp_3d_tricubic(info, x, y, z);
+    }
+  }
 #undef SET_CUBIC_SPLINE_WEIGHTS
 };
 
 ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
 {
-	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
-
-	switch(kernel_tex_type(id)) {
-		case IMAGE_DATA_TYPE_HALF:
-			return TextureInterpolator<half>::interp(info, x, y);
-		case IMAGE_DATA_TYPE_BYTE:
-			return TextureInterpolator<uchar>::interp(info, x, y);
-		case IMAGE_DATA_TYPE_USHORT:
-			return TextureInterpolator<uint16_t>::interp(info, x, y);
-		case IMAGE_DATA_TYPE_FLOAT:
-			return TextureInterpolator<float>::interp(info, x, y);
-		case IMAGE_DATA_TYPE_HALF4:
-			return TextureInterpolator<half4>::interp(info, x, y);
-		case IMAGE_DATA_TYPE_BYTE4:
-			return TextureInterpolator<uchar4>::interp(info, x, y);
-		case IMAGE_DATA_TYPE_USHORT4:
-			return TextureInterpolator<ushort4>::interp(info, x, y);
-		case IMAGE_DATA_TYPE_FLOAT4:
-			return TextureInterpolator<float4>::interp(info, x, y);
-		default:
-			assert(0);
-			return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
-	}
+  const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+
+  switch (kernel_tex_type(id)) {
+    case IMAGE_DATA_TYPE_HALF:
+      return TextureInterpolator<half>::interp(info, x, y);
+    case IMAGE_DATA_TYPE_BYTE:
+      return TextureInterpolator<uchar>::interp(info, x, y);
+    case IMAGE_DATA_TYPE_USHORT:
+      return TextureInterpolator<uint16_t>::interp(info, x, y);
+    case IMAGE_DATA_TYPE_FLOAT:
+      return TextureInterpolator<float>::interp(info, x, y);
+    case IMAGE_DATA_TYPE_HALF4:
+      return TextureInterpolator<half4>::interp(info, x, y);
+    case IMAGE_DATA_TYPE_BYTE4:
+      return TextureInterpolator<uchar4>::interp(info, x, y);
+    case IMAGE_DATA_TYPE_USHORT4:
+      return TextureInterpolator<ushort4>::interp(info, x, y);
+    case IMAGE_DATA_TYPE_FLOAT4:
+      return TextureInterpolator<float4>::interp(info, x, y);
+    default:
+      assert(0);
+      return make_float4(
+          TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
+  }
 }
 
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
+ccl_device float4 kernel_tex_image_interp_3d(
+    KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
 {
-	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
-
-	switch(kernel_tex_type(id)) {
-		case IMAGE_DATA_TYPE_HALF:
-			return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
-		case IMAGE_DATA_TYPE_BYTE:
-			return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
-		case IMAGE_DATA_TYPE_USHORT:
-			return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp);
-		case IMAGE_DATA_TYPE_FLOAT:
-			return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
-		case IMAGE_DATA_TYPE_HALF4:
-			return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
-		case IMAGE_DATA_TYPE_BYTE4:
-			return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
-		case IMAGE_DATA_TYPE_USHORT4:
-			return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp);
-		case IMAGE_DATA_TYPE_FLOAT4:
-			return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
-		default:
-			assert(0);
-			return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
-	}
+  const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+
+  switch (kernel_tex_type(id)) {
+    case IMAGE_DATA_TYPE_HALF:
+      return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
+    case IMAGE_DATA_TYPE_BYTE:
+      return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
+    case IMAGE_DATA_TYPE_USHORT:
+      return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp);
+    case IMAGE_DATA_TYPE_FLOAT:
+      return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
+    case IMAGE_DATA_TYPE_HALF4:
+      return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
+    case IMAGE_DATA_TYPE_BYTE4:
+      return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
+    case IMAGE_DATA_TYPE_USHORT4:
+      return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp);
+    case IMAGE_DATA_TYPE_FLOAT4:
+      return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
+    default:
+      assert(0);
+      return make_float4(
+          TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 759b7e4c20d..9ca3f46b5b6 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -58,14 +58,15 @@
 #    include "kernel/split/kernel_next_iteration_setup.h"
 #    include "kernel/split/kernel_indirect_subsurface.h"
 #    include "kernel/split/kernel_buffer_update.h"
-#  endif  /* __SPLIT_KERNEL__ */
+#  endif /* __SPLIT_KERNEL__ */
 #else
-#  define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!"))
+#  define STUB_ASSERT(arch, name) \
+    assert(!(#name " kernel stub for architecture " #arch " was called!"))
 
 #  ifdef __SPLIT_KERNEL__
 #    include "kernel/split/kernel_data_init.h"
-#  endif  /* __SPLIT_KERNEL__ */
-#endif  /* KERNEL_STUB */
+#  endif /* __SPLIT_KERNEL__ */
+#endif   /* KERNEL_STUB */
 
 CCL_NAMESPACE_BEGIN
 
@@ -73,31 +74,22 @@ CCL_NAMESPACE_BEGIN
 
 /* Path Tracing */
 
-void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
-                                           float *buffer,
-                                           int sample,
-                                           int x, int y,
-                                           int offset,
-                                           int stride)
+void KERNEL_FUNCTION_FULL_NAME(path_trace)(
+    KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
 {
-#ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, path_trace);
-#else
-#  ifdef __BRANCHED_PATH__
-	if(kernel_data.integrator.branched) {
-		kernel_branched_path_trace(kg,
-		                           buffer,
-		                           sample,
-		                           x, y,
-		                           offset,
-		                           stride);
-	}
-	else
-#  endif
-	{
-		kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
-	}
-#endif  /* KERNEL_STUB */
+#  ifdef KERNEL_STUB
+  STUB_ASSERT(KERNEL_ARCH, path_trace);
+#  else
+#    ifdef __BRANCHED_PATH__
+  if (kernel_data.integrator.branched) {
+    kernel_branched_path_trace(kg, buffer, sample, x, y, offset, stride);
+  }
+  else
+#    endif
+  {
+    kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
+  }
+#  endif /* KERNEL_STUB */
 }
 
 /* Film */
@@ -106,42 +98,32 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
                                                 uchar4 *rgba,
                                                 float *buffer,
                                                 float sample_scale,
-                                                int x, int y,
+                                                int x,
+                                                int y,
                                                 int offset,
                                                 int stride)
 {
-#ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, convert_to_byte);
-#else
-	kernel_film_convert_to_byte(kg,
-	                            rgba,
-	                            buffer,
-	                            sample_scale,
-	                            x, y,
-	                            offset,
-	                            stride);
-#endif  /* KERNEL_STUB */
+#  ifdef KERNEL_STUB
+  STUB_ASSERT(KERNEL_ARCH, convert_to_byte);
+#  else
+  kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
+#  endif /* KERNEL_STUB */
 }
 
 void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
                                                       uchar4 *rgba,
                                                       float *buffer,
                                                       float sample_scale,
-                                                      int x, int y,
+                                                      int x,
+                                                      int y,
                                                       int offset,
                                                       int stride)
 {
-#ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, convert_to_half_float);
-#else
-	kernel_film_convert_to_half_float(kg,
-	                                  rgba,
-	                                  buffer,
-	                                  sample_scale,
-	                                  x, y,
-	                                  offset,
-	                                  stride);
-#endif  /* KERNEL_STUB */
+#  ifdef KERNEL_STUB
+  STUB_ASSERT(KERNEL_ARCH, convert_to_half_float);
+#  else
+  kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
+#  endif /* KERNEL_STUB */
 }
 
 /* Shader Evaluate */
@@ -155,60 +137,53 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
                                        int offset,
                                        int sample)
 {
-#ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, shader);
-#else
-	if(type >= SHADER_EVAL_BAKE) {
-#  ifdef __BAKING__
-		kernel_bake_evaluate(kg,
-		                     input,
-		                     output,
-		                     (ShaderEvalType)type,
-		                     filter,
-		                     i,
-		                     offset,
-		                     sample);
-#  endif
-	}
-	else if(type == SHADER_EVAL_DISPLACE) {
-		kernel_displace_evaluate(kg, input, output, i);
-	}
-	else {
-		kernel_background_evaluate(kg, input, output, i);
-	}
-#endif  /* KERNEL_STUB */
+#  ifdef KERNEL_STUB
+  STUB_ASSERT(KERNEL_ARCH, shader);
+#  else
+  if (type >= SHADER_EVAL_BAKE) {
+#    ifdef __BAKING__
+    kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, i, offset, sample);
+#    endif
+  }
+  else if (type == SHADER_EVAL_DISPLACE) {
+    kernel_displace_evaluate(kg, input, output, i);
+  }
+  else {
+    kernel_background_evaluate(kg, input, output, i);
+  }
+#  endif /* KERNEL_STUB */
 }
 
-#else  /* __SPLIT_KERNEL__ */
+#else /* __SPLIT_KERNEL__ */
 
 /* Split Kernel Path Tracing */
 
-#ifdef KERNEL_STUB
-#  define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
-	void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
-	{ \
-		STUB_ASSERT(KERNEL_ARCH, name); \
-	}
-
-#  define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
-	void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
-	{ \
-		STUB_ASSERT(KERNEL_ARCH, name); \
-	}
-#else
-#  define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
-	void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
-	{ \
-		kernel_##name(kg); \
-	}
-
-#  define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
-	void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
-	{ \
-		ccl_local type locals; \
-		kernel_##name(kg, &locals); \
-	}
-#endif  /* KERNEL_STUB */
+#  ifdef KERNEL_STUB
+#    define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
+      void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+      { \
+        STUB_ASSERT(KERNEL_ARCH, name); \
+      }
+
+#    define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
+      void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+      { \
+        STUB_ASSERT(KERNEL_ARCH, name); \
+      }
+#  else
+#    define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
+      void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+      { \
+        kernel_##name(kg); \
+      }
+
+#    define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
+      void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+      { \
+        ccl_local type locals; \
+        kernel_##name(kg, &locals); \
+      }
+#  endif /* KERNEL_STUB */
 
 DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
 DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
@@ -219,7 +194,8 @@ DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao,
+                                    BackgroundAOLocals)
 DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
@@ -228,7 +204,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
-#endif  /* __SPLIT_KERNEL__ */
+#endif   /* __SPLIT_KERNEL__ */
 
 #undef KERNEL_STUB
 #undef STUB_ASSERT
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
index c5e199b0a69..989f5e5aaa8 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
@@ -54,7 +54,7 @@
 
 /* quiet unused define warnings */
 #if defined(__KERNEL_SSE2__)
-    /* do nothing */
+/* do nothing */
 #endif
 
 #include "kernel/kernel.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
index 6ba3425a343..1b2e2516751 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
@@ -34,7 +34,7 @@
 #    define __KERNEL_SSE41__
 #    define __KERNEL_AVX__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
 
 #include "kernel/kernel.h"
 #define KERNEL_ARCH cpu_avx
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
index 76b2d77ebb8..43b8bfbf864 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
@@ -35,7 +35,7 @@
 #    define __KERNEL_AVX__
 #    define __KERNEL_AVX2__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
 
 #include "kernel/kernel.h"
 #define KERNEL_ARCH cpu_avx2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
index b468b6f44c8..9743789179d 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
@@ -29,7 +29,7 @@
 #  if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
 #    define __KERNEL_SSE2__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
 
 #include "kernel/kernel.h"
 #define KERNEL_ARCH cpu_sse2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
index 3e5792d0b17..1bec7633500 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
@@ -31,7 +31,7 @@
 #    define __KERNEL_SSE3__
 #    define __KERNEL_SSSE3__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
 
 #include "kernel/kernel.h"
 #define KERNEL_ARCH cpu_sse3
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
index 3629f21cd29..c0efc2350e9 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
@@ -32,7 +32,7 @@
 #    define __KERNEL_SSSE3__
 #    define __KERNEL_SSE41__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
 
 #include "kernel/kernel.h"
 #define KERNEL_ARCH cpu_sse41
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
index 57530c88710..173be8e93ce 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
@@ -27,7 +27,7 @@
 #  if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
 #    define __KERNEL_SSE2__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
 
 #include "kernel/kernel.h"
 #define KERNEL_ARCH cpu_sse2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
index c607753bc4b..31273fe3344 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
@@ -29,7 +29,7 @@
 #    define __KERNEL_SSE3__
 #    define __KERNEL_SSSE3__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
 
 #include "kernel/kernel.h"
 #define KERNEL_ARCH cpu_sse3
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
index a278554731c..1d020b7fee6 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
@@ -30,7 +30,7 @@
 #    define __KERNEL_SSSE3__
 #    define __KERNEL_SSE41__
 #  endif
-#endif  /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
 
 #include "kernel/kernel.h"
 #define KERNEL_ARCH cpu_sse41
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h
index 6d41dc15785..d9f349837a8 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_config.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h
@@ -81,7 +81,6 @@
 #  define CUDA_KERNEL_MAX_REGISTERS 64
 #  define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 72
 
-
 /* unknown architecture */
 #else
 #  error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
@@ -96,18 +95,19 @@
  * given the maximum number of registers per thread. */
 
 #define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \
-	__launch_bounds__( \
-		threads_block_width*threads_block_width, \
-		CUDA_MULTIPRESSOR_MAX_REGISTERS/(threads_block_width*threads_block_width*thread_num_registers) \
-		)
+  __launch_bounds__(threads_block_width *threads_block_width, \
+                    CUDA_MULTIPRESSOR_MAX_REGISTERS / \
+                        (threads_block_width * threads_block_width * thread_num_registers))
 
 /* sanity checks */
 
-#if CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS
+#if CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS
 #  error "Maximum number of threads per block exceeded"
 #endif
 
-#if CUDA_MULTIPRESSOR_MAX_REGISTERS/(CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH*CUDA_KERNEL_MAX_REGISTERS) > CUDA_MULTIPROCESSOR_MAX_BLOCKS
+#if CUDA_MULTIPRESSOR_MAX_REGISTERS / \
+        (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH * CUDA_KERNEL_MAX_REGISTERS) > \
+    CUDA_MULTIPROCESSOR_MAX_BLOCKS
 #  error "Maximum number of blocks per multiprocessor exceeded"
 #endif
 
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index 37cfbbcb235..7c68f08ea10 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -17,174 +17,165 @@
 /* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
 ccl_device float cubic_w0(float a)
 {
-	return (1.0f/6.0f)*(a*(a*(-a + 3.0f) - 3.0f) + 1.0f);
+  return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
 }
 
 ccl_device float cubic_w1(float a)
 {
-	return (1.0f/6.0f)*(a*a*(3.0f*a - 6.0f) + 4.0f);
+  return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
 }
 
 ccl_device float cubic_w2(float a)
 {
-	return (1.0f/6.0f)*(a*(a*(-3.0f*a + 3.0f) + 3.0f) + 1.0f);
+  return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
 }
 
 ccl_device float cubic_w3(float a)
 {
-	return (1.0f/6.0f)*(a*a*a);
+  return (1.0f / 6.0f) * (a * a * a);
 }
 
 /* g0 and g1 are the two amplitude functions. */
 ccl_device float cubic_g0(float a)
 {
-	return cubic_w0(a) + cubic_w1(a);
+  return cubic_w0(a) + cubic_w1(a);
 }
 
 ccl_device float cubic_g1(float a)
 {
-	return cubic_w2(a) + cubic_w3(a);
+  return cubic_w2(a) + cubic_w3(a);
 }
 
 /* h0 and h1 are the two offset functions */
 ccl_device float cubic_h0(float a)
 {
-	/* Note +0.5 offset to compensate for CUDA linear filtering convention. */
-	return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
+  /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
+  return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
 }
 
 ccl_device float cubic_h1(float a)
 {
-	return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
+  return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
 }
 
 /* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
 template<typename T>
-ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo& info, CUtexObject tex, float x, float y)
+ccl_device T
+kernel_tex_image_interp_bicubic(const TextureInfo &info, CUtexObject tex, float x, float y)
 {
-	x = (x * info.width) - 0.5f;
-	y = (y * info.height) - 0.5f;
-
-	float px = floor(x);
-	float py = floor(y);
-	float fx = x - px;
-	float fy = y - py;
-
-	float g0x = cubic_g0(fx);
-	float g1x = cubic_g1(fx);
-	float x0 = (px + cubic_h0(fx)) / info.width;
-	float x1 = (px + cubic_h1(fx)) / info.width;
-	float y0 = (py + cubic_h0(fy)) / info.height;
-	float y1 = (py + cubic_h1(fy)) / info.height;
-
-	return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) +
-	                       g1x * tex2D<T>(tex, x1, y0)) +
-	       cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) +
-	                       g1x * tex2D<T>(tex, x1, y1));
+  x = (x * info.width) - 0.5f;
+  y = (y * info.height) - 0.5f;
+
+  float px = floor(x);
+  float py = floor(y);
+  float fx = x - px;
+  float fy = y - py;
+
+  float g0x = cubic_g0(fx);
+  float g1x = cubic_g1(fx);
+  float x0 = (px + cubic_h0(fx)) / info.width;
+  float x1 = (px + cubic_h1(fx)) / info.width;
+  float y0 = (py + cubic_h0(fy)) / info.height;
+  float y1 = (py + cubic_h1(fy)) / info.height;
+
+  return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) +
+         cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1));
 }
 
 /* Fast tricubic texture lookup using 8 trilinear lookups. */
 template<typename T>
-ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo& info, CUtexObject tex, float x, float y, float z)
+ccl_device T kernel_tex_image_interp_bicubic_3d(
+    const TextureInfo &info, CUtexObject tex, float x, float y, float z)
 {
-	x = (x * info.width) - 0.5f;
-	y = (y * info.height) - 0.5f;
-	z = (z * info.depth) - 0.5f;
-
-	float px = floor(x);
-	float py = floor(y);
-	float pz = floor(z);
-	float fx = x - px;
-	float fy = y - py;
-	float fz = z - pz;
-
-	float g0x = cubic_g0(fx);
-	float g1x = cubic_g1(fx);
-	float g0y = cubic_g0(fy);
-	float g1y = cubic_g1(fy);
-	float g0z = cubic_g0(fz);
-	float g1z = cubic_g1(fz);
-
-	float x0 = (px + cubic_h0(fx)) / info.width;
-	float x1 = (px + cubic_h1(fx)) / info.width;
-	float y0 = (py + cubic_h0(fy)) / info.height;
-	float y1 = (py + cubic_h1(fy)) / info.height;
-	float z0 = (pz + cubic_h0(fz)) / info.depth;
-	float z1 = (pz + cubic_h1(fz)) / info.depth;
-
-	return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) +
-	                     g1x * tex3D<T>(tex, x1, y0, z0)) +
-	              g1y * (g0x * tex3D<T>(tex, x0, y1, z0) +
-	                     g1x * tex3D<T>(tex, x1, y1, z0))) +
-	       g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) +
-	                     g1x * tex3D<T>(tex, x1, y0, z1)) +
-	              g1y * (g0x * tex3D<T>(tex, x0, y1, z1) +
-	                     g1x * tex3D<T>(tex, x1, y1, z1)));
+  x = (x * info.width) - 0.5f;
+  y = (y * info.height) - 0.5f;
+  z = (z * info.depth) - 0.5f;
+
+  float px = floor(x);
+  float py = floor(y);
+  float pz = floor(z);
+  float fx = x - px;
+  float fy = y - py;
+  float fz = z - pz;
+
+  float g0x = cubic_g0(fx);
+  float g1x = cubic_g1(fx);
+  float g0y = cubic_g0(fy);
+  float g1y = cubic_g1(fy);
+  float g0z = cubic_g0(fz);
+  float g1z = cubic_g1(fz);
+
+  float x0 = (px + cubic_h0(fx)) / info.width;
+  float x1 = (px + cubic_h1(fx)) / info.width;
+  float y0 = (py + cubic_h0(fy)) / info.height;
+  float y1 = (py + cubic_h1(fy)) / info.height;
+  float z0 = (pz + cubic_h0(fz)) / info.depth;
+  float z1 = (pz + cubic_h1(fz)) / info.depth;
+
+  return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) +
+                g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) +
+         g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) + g1x * tex3D<T>(tex, x1, y0, z1)) +
+                g1y * (g0x * tex3D<T>(tex, x0, y1, z1) + g1x * tex3D<T>(tex, x1, y1, z1)));
 }
 
 ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
 {
-	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
-	CUtexObject tex = (CUtexObject)info.data;
-
-	/* float4, byte4, ushort4 and half4 */
-	const int texture_type = kernel_tex_type(id);
-	if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
-	   texture_type == IMAGE_DATA_TYPE_BYTE4 ||
-	   texture_type == IMAGE_DATA_TYPE_HALF4 ||
-	   texture_type == IMAGE_DATA_TYPE_USHORT4)
-	{
-		if(info.interpolation == INTERPOLATION_CUBIC) {
-			return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y);
-		}
-		else {
-			return tex2D<float4>(tex, x, y);
-		}
-	}
-	/* float, byte and half */
-	else {
-		float f;
-
-		if(info.interpolation == INTERPOLATION_CUBIC) {
-			f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y);
-		}
-		else {
-			f = tex2D<float>(tex, x, y);
-		}
-
-		return make_float4(f, f, f, 1.0f);
-	}
+  const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+  CUtexObject tex = (CUtexObject)info.data;
+
+  /* float4, byte4, ushort4 and half4 */
+  const int texture_type = kernel_tex_type(id);
+  if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+      texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
+    if (info.interpolation == INTERPOLATION_CUBIC) {
+      return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y);
+    }
+    else {
+      return tex2D<float4>(tex, x, y);
+    }
+  }
+  /* float, byte and half */
+  else {
+    float f;
+
+    if (info.interpolation == INTERPOLATION_CUBIC) {
+      f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y);
+    }
+    else {
+      f = tex2D<float>(tex, x, y);
+    }
+
+    return make_float4(f, f, f, 1.0f);
+  }
 }
 
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
+ccl_device float4 kernel_tex_image_interp_3d(
+    KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
 {
-	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
-	CUtexObject tex = (CUtexObject)info.data;
-	uint interpolation = (interp == INTERPOLATION_NONE)? info.interpolation: interp;
-
-	const int texture_type = kernel_tex_type(id);
-	if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
-	   texture_type == IMAGE_DATA_TYPE_BYTE4 ||
-	   texture_type == IMAGE_DATA_TYPE_HALF4 ||
-	   texture_type == IMAGE_DATA_TYPE_USHORT4)
-	{
-		if(interpolation == INTERPOLATION_CUBIC) {
-			return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z);
-		}
-		else {
-			return tex3D<float4>(tex, x, y, z);
-		}
-	}
-	else {
-		float f;
-
-		if(interpolation == INTERPOLATION_CUBIC) {
-			f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z);
-		}
-		else {
-			f = tex3D<float>(tex, x, y, z);
-		}
-
-		return make_float4(f, f, f, 1.0f);
-	}
+  const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+  CUtexObject tex = (CUtexObject)info.data;
+  uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp;
+
+  const int texture_type = kernel_tex_type(id);
+  if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+      texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
+    if (interpolation == INTERPOLATION_CUBIC) {
+      return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z);
+    }
+    else {
+      return tex3D<float4>(tex, x, y, z);
+    }
+  }
+  else {
+    float f;
+
+    if (interpolation == INTERPOLATION_CUBIC) {
+      f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z);
+    }
+    else {
+      f = tex3D<float>(tex, x, y, z);
+    }
+
+    return make_float4(f, f, f, 1.0f);
+  }
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
index 79af831c2fb..b6390679331 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@@ -16,254 +16,257 @@
 
 /* For OpenCL we do manual lookup and interpolation. */
 
-ccl_device_inline ccl_global TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) {
-	const uint tex_offset = id
-#define KERNEL_TEX(type, name) + 1
+ccl_device_inline ccl_global TextureInfo *kernel_tex_info(KernelGlobals *kg, uint id)
+{
+  const uint tex_offset = id
+#define KERNEL_TEX(type, name) +1
 #include "kernel/kernel_textures.h"
-	;
+      ;
 
-	return &((ccl_global TextureInfo*)kg->buffers[0])[tex_offset];
+  return &((ccl_global TextureInfo *)kg->buffers[0])[tex_offset];
 }
 
-#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->cl_buffer] + info->data))[(index)]
+#define tex_fetch(type, info, index) \
+  ((ccl_global type *)(kg->buffers[info->cl_buffer] + info->data))[(index)]
 
 ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
 {
-	x %= width;
-	if(x < 0)
-		x += width;
-	return x;
+  x %= width;
+  if (x < 0)
+    x += width;
+  return x;
 }
 
 ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
 {
-	return clamp(x, 0, width-1);
+  return clamp(x, 0, width - 1);
 }
 
-ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, const ccl_global TextureInfo *info, int id, int offset)
+ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg,
+                                                const ccl_global TextureInfo *info,
+                                                int id,
+                                                int offset)
 {
-	const int texture_type = kernel_tex_type(id);
-
-	/* Float4 */
-	if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
-		return tex_fetch(float4, info, offset);
-	}
-	/* Byte4 */
-	else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
-		uchar4 r = tex_fetch(uchar4, info, offset);
-		float f = 1.0f/255.0f;
-		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
-	}
-	/* Ushort4 */
-	else if(texture_type == IMAGE_DATA_TYPE_USHORT4) {
-		ushort4 r = tex_fetch(ushort4, info, offset);
-		float f = 1.0f/65535.f;
-		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
-	}
-	/* Float */
-	else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
-		float f = tex_fetch(float, info, offset);
-		return make_float4(f, f, f, 1.0f);
-	}
-	/* UShort */
-	else if(texture_type == IMAGE_DATA_TYPE_USHORT) {
-		ushort r = tex_fetch(ushort, info, offset);
-		float f = r * (1.0f / 65535.0f);
-		return make_float4(f, f, f, 1.0f);
-	}
-	/* Byte */
+  const int texture_type = kernel_tex_type(id);
+
+  /* Float4 */
+  if (texture_type == IMAGE_DATA_TYPE_FLOAT4) {
+    return tex_fetch(float4, info, offset);
+  }
+  /* Byte4 */
+  else if (texture_type == IMAGE_DATA_TYPE_BYTE4) {
+    uchar4 r = tex_fetch(uchar4, info, offset);
+    float f = 1.0f / 255.0f;
+    return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+  }
+  /* Ushort4 */
+  else if (texture_type == IMAGE_DATA_TYPE_USHORT4) {
+    ushort4 r = tex_fetch(ushort4, info, offset);
+    float f = 1.0f / 65535.f;
+    return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+  }
+  /* Float */
+  else if (texture_type == IMAGE_DATA_TYPE_FLOAT) {
+    float f = tex_fetch(float, info, offset);
+    return make_float4(f, f, f, 1.0f);
+  }
+  /* UShort */
+  else if (texture_type == IMAGE_DATA_TYPE_USHORT) {
+    ushort r = tex_fetch(ushort, info, offset);
+    float f = r * (1.0f / 65535.0f);
+    return make_float4(f, f, f, 1.0f);
+  }
+  /* Byte */
 #ifdef cl_khr_fp16
-	/* half and half4 are optional in OpenCL */
-	else if(texture_type == IMAGE_DATA_TYPE_HALF) {
-		float f = tex_fetch(half, info, offset);
-		return make_float4(f, f, f, 1.0f);
-	}
-	else if(texture_type == IMAGE_DATA_TYPE_HALF4) {
-		half4 r = tex_fetch(half4, info, offset);
-		return make_float4(r.x, r.y, r.z, r.w);
-	}
+  /* half and half4 are optional in OpenCL */
+  else if (texture_type == IMAGE_DATA_TYPE_HALF) {
+    float f = tex_fetch(half, info, offset);
+    return make_float4(f, f, f, 1.0f);
+  }
+  else if (texture_type == IMAGE_DATA_TYPE_HALF4) {
+    half4 r = tex_fetch(half4, info, offset);
+    return make_float4(r.x, r.y, r.z, r.w);
+  }
 #endif
-	else {
-		uchar r = tex_fetch(uchar, info, offset);
-		float f = r * (1.0f/255.0f);
-		return make_float4(f, f, f, 1.0f);
-	}
+  else {
+    uchar r = tex_fetch(uchar, info, offset);
+    float f = r * (1.0f / 255.0f);
+    return make_float4(f, f, f, 1.0f);
+  }
 }
 
 ccl_device_inline float4 svm_image_texture_read_2d(KernelGlobals *kg, int id, int x, int y)
 {
-	const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
-	/* Wrap */
-	if(info->extension == EXTENSION_REPEAT) {
-		x = svm_image_texture_wrap_periodic(x, info->width);
-		y = svm_image_texture_wrap_periodic(y, info->height);
-	}
-	else {
-		x = svm_image_texture_wrap_clamp(x, info->width);
-		y = svm_image_texture_wrap_clamp(y, info->height);
-	}
-
-	int offset = x + info->width * y;
-	return svm_image_texture_read(kg, info, id, offset);
+  const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+  /* Wrap */
+  if (info->extension == EXTENSION_REPEAT) {
+    x = svm_image_texture_wrap_periodic(x, info->width);
+    y = svm_image_texture_wrap_periodic(y, info->height);
+  }
+  else {
+    x = svm_image_texture_wrap_clamp(x, info->width);
+    y = svm_image_texture_wrap_clamp(y, info->height);
+  }
+
+  int offset = x + info->width * y;
+  return svm_image_texture_read(kg, info, id, offset);
 }
 
 ccl_device_inline float4 svm_image_texture_read_3d(KernelGlobals *kg, int id, int x, int y, int z)
 {
-	const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
-	/* Wrap */
-	if(info->extension == EXTENSION_REPEAT) {
-		x = svm_image_texture_wrap_periodic(x, info->width);
-		y = svm_image_texture_wrap_periodic(y, info->height);
-		z = svm_image_texture_wrap_periodic(z, info->depth);
-	}
-	else {
-		x = svm_image_texture_wrap_clamp(x, info->width);
-		y = svm_image_texture_wrap_clamp(y, info->height);
-		z = svm_image_texture_wrap_clamp(z, info->depth);
-	}
-
-	int offset = x + info->width * y + info->width * info->height * z;
-	return svm_image_texture_read(kg, info, id, offset);
+  const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+  /* Wrap */
+  if (info->extension == EXTENSION_REPEAT) {
+    x = svm_image_texture_wrap_periodic(x, info->width);
+    y = svm_image_texture_wrap_periodic(y, info->height);
+    z = svm_image_texture_wrap_periodic(z, info->depth);
+  }
+  else {
+    x = svm_image_texture_wrap_clamp(x, info->width);
+    y = svm_image_texture_wrap_clamp(y, info->height);
+    z = svm_image_texture_wrap_clamp(z, info->depth);
+  }
+
+  int offset = x + info->width * y + info->width * info->height * z;
+  return svm_image_texture_read(kg, info, id, offset);
 }
 
-
 ccl_device_inline float svm_image_texture_frac(float x, int *ix)
 {
-	int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
-	*ix = i;
-	return x - (float)i;
+  int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
+  *ix = i;
+  return x - (float)i;
 }
 
 #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
-	{ \
-		u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
-		u[1] =  ((      0.5f * t - 1.0f) * t       ) * t + (2.0f/3.0f); \
-		u[2] =  ((     -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
-		u[3] = (1.0f / 6.0f) * t * t * t; \
-	} (void) 0
+  { \
+    u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
+    u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
+    u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
+    u[3] = (1.0f / 6.0f) * t * t * t; \
+  } \
+  (void)0
 
 ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
 {
-	const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
-	if(info->extension == EXTENSION_CLIP) {
-		if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
-			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-	}
-
-	if(info->interpolation == INTERPOLATION_CLOSEST) {
-		/* Closest interpolation. */
-		int ix, iy;
-		svm_image_texture_frac(x*info->width, &ix);
-		svm_image_texture_frac(y*info->height, &iy);
-
-		return svm_image_texture_read_2d(kg, id, ix, iy);
-	}
-	else if(info->interpolation == INTERPOLATION_LINEAR) {
-		/* Bilinear interpolation. */
-		int ix, iy;
-		float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
-		float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
-
-		float4 r;
-		r =  (1.0f - ty)*(1.0f - tx)*svm_image_texture_read_2d(kg, id, ix, iy);
-		r += (1.0f - ty)*tx*svm_image_texture_read_2d(kg, id, ix+1, iy);
-		r += ty*(1.0f - tx)*svm_image_texture_read_2d(kg, id, ix, iy+1);
-		r += ty*tx*svm_image_texture_read_2d(kg, id, ix+1, iy+1);
-		return r;
-	}
-	else {
-		/* Bicubic interpolation. */
-		int ix, iy;
-		float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
-		float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
-
-		float u[4], v[4];
-		SET_CUBIC_SPLINE_WEIGHTS(u, tx);
-		SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-
-		float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-		for(int y = 0; y < 4; y++) {
-			for(int x = 0; x < 4; x++) {
-				float weight = u[x]*v[y];
-				r += weight*svm_image_texture_read_2d(kg, id, ix+x-1, iy+y-1);
-			}
-		}
-		return r;
-	}
+  const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+  if (info->extension == EXTENSION_CLIP) {
+    if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+  }
+
+  if (info->interpolation == INTERPOLATION_CLOSEST) {
+    /* Closest interpolation. */
+    int ix, iy;
+    svm_image_texture_frac(x * info->width, &ix);
+    svm_image_texture_frac(y * info->height, &iy);
+
+    return svm_image_texture_read_2d(kg, id, ix, iy);
+  }
+  else if (info->interpolation == INTERPOLATION_LINEAR) {
+    /* Bilinear interpolation. */
+    int ix, iy;
+    float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+    float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+
+    float4 r;
+    r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy);
+    r += (1.0f - ty) * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy);
+    r += ty * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy + 1);
+    r += ty * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy + 1);
+    return r;
+  }
+  else {
+    /* Bicubic interpolation. */
+    int ix, iy;
+    float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+    float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+
+    float u[4], v[4];
+    SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+    SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+
+    float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+    for (int y = 0; y < 4; y++) {
+      for (int x = 0; x < 4; x++) {
+        float weight = u[x] * v[y];
+        r += weight * svm_image_texture_read_2d(kg, id, ix + x - 1, iy + y - 1);
+      }
+    }
+    return r;
+  }
 }
 
-
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
+ccl_device float4
+kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
 {
-	const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
-	if(info->extension == EXTENSION_CLIP) {
-		if(x < 0.0f || y < 0.0f || z < 0.0f ||
-		   x > 1.0f || y > 1.0f || z > 1.0f)
-		{
-			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-	}
-
-	uint interpolation = (interp == INTERPOLATION_NONE)? info->interpolation: interp;
-
-	if(interpolation == INTERPOLATION_CLOSEST) {
-		/* Closest interpolation. */
-		int ix, iy, iz;
-		svm_image_texture_frac(x*info->width, &ix);
-		svm_image_texture_frac(y*info->height, &iy);
-		svm_image_texture_frac(z*info->depth, &iz);
-
-		return svm_image_texture_read_3d(kg, id, ix, iy, iz);
-	}
-	else if(interpolation == INTERPOLATION_LINEAR) {
-		/* Bilinear interpolation. */
-		int ix, iy, iz;
-		float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
-		float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
-		float tz = svm_image_texture_frac(z*info->depth - 0.5f, &iz);
-
-		float4 r;
-		r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy, iz);
-		r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read_3d(kg, id, ix+1, iy, iz);
-		r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy+1, iz);
-		r += (1.0f - tz)*ty*tx*svm_image_texture_read_3d(kg, id, ix+1, iy+1, iz);
-
-		r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy, iz+1);
-		r += tz*(1.0f - ty)*tx*svm_image_texture_read_3d(kg, id, ix+1, iy, iz+1);
-		r += tz*ty*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy+1, iz+1);
-		r += tz*ty*tx*svm_image_texture_read_3d(kg, id, ix+1, iy+1, iz+1);
-		return r;
-	}
-	else {
-		/* Bicubic interpolation. */
-		int ix, iy, iz;
-		float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
-		float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
-		float tz = svm_image_texture_frac(z*info->depth - 0.5f, &iz);
-
-		float u[4], v[4], w[4];
-		SET_CUBIC_SPLINE_WEIGHTS(u, tx);
-		SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-		SET_CUBIC_SPLINE_WEIGHTS(w, tz);
-
-		float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-		for(int z = 0; z < 4; z++) {
-			for(int y = 0; y < 4; y++) {
-				for(int x = 0; x < 4; x++) {
-					float weight = u[x]*v[y]*w[z];
-					r += weight*svm_image_texture_read_3d(kg, id, ix+x-1, iy+y-1, iz+z-1);
-				}
-			}
-		}
-		return r;
-	}
+  const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+  if (info->extension == EXTENSION_CLIP) {
+    if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+  }
+
+  uint interpolation = (interp == INTERPOLATION_NONE) ? info->interpolation : interp;
+
+  if (interpolation == INTERPOLATION_CLOSEST) {
+    /* Closest interpolation. */
+    int ix, iy, iz;
+    svm_image_texture_frac(x * info->width, &ix);
+    svm_image_texture_frac(y * info->height, &iy);
+    svm_image_texture_frac(z * info->depth, &iz);
+
+    return svm_image_texture_read_3d(kg, id, ix, iy, iz);
+  }
+  else if (interpolation == INTERPOLATION_LINEAR) {
+    /* Bilinear interpolation. */
+    int ix, iy, iz;
+    float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+    float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+    float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz);
+
+    float4 r;
+    r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz);
+    r += (1.0f - tz) * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz);
+    r += (1.0f - tz) * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz);
+    r += (1.0f - tz) * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz);
+
+    r += tz * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz + 1);
+    r += tz * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz + 1);
+    r += tz * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz + 1);
+    r += tz * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz + 1);
+    return r;
+  }
+  else {
+    /* Bicubic interpolation. */
+    int ix, iy, iz;
+    float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+    float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+    float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz);
+
+    float u[4], v[4], w[4];
+    SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+    SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+    SET_CUBIC_SPLINE_WEIGHTS(w, tz);
+
+    float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+    for (int z = 0; z < 4; z++) {
+      for (int y = 0; y < 4; y++) {
+        for (int x = 0; x < 4; x++) {
+          float weight = u[x] * v[y] * w[z];
+          r += weight * svm_image_texture_read_3d(kg, id, ix + x - 1, iy + y - 1, iz + z - 1);
+        }
+      }
+    }
+    return r;
+  }
 }
 
 #undef SET_CUBIC_SPLINE_WEIGHTS
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
index 05e1ddf6da2..e123b4cd6ec 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
@@ -14,50 +14,53 @@
  * limitations under the License.
  */
 
-#define KERNEL_NAME_JOIN(a, b) a ## _ ## b
+#define KERNEL_NAME_JOIN(a, b) a##_##b
 #define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b)
 
-__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
-		ccl_global char *kg_global,
-		ccl_constant KernelData *data,
+__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace,
+                               KERNEL_NAME)(ccl_global char *kg_global,
+                                            ccl_constant KernelData *data,
 
-		ccl_global void *split_data_buffer,
-		ccl_global char *ray_state,
+                                            ccl_global void *split_data_buffer,
+                                            ccl_global char *ray_state,
 
-		KERNEL_BUFFER_PARAMS,
+                                            KERNEL_BUFFER_PARAMS,
 
-		ccl_global int *queue_index,
-		ccl_global char *use_queues_flag,
-		ccl_global unsigned int *work_pools,
-		ccl_global float *buffer
-	)
+                                            ccl_global int *queue_index,
+                                            ccl_global char *use_queues_flag,
+                                            ccl_global unsigned int *work_pools,
+                                            ccl_global float *buffer)
 {
 #ifdef LOCALS_TYPE
-	ccl_local LOCALS_TYPE locals;
+  ccl_local LOCALS_TYPE locals;
 #endif
 
-	KernelGlobals *kg = (KernelGlobals*)kg_global;
+  KernelGlobals *kg = (KernelGlobals *)kg_global;
 
-	if(ccl_local_id(0) + ccl_local_id(1) == 0) {
-		kg->data = data;
+  if (ccl_local_id(0) + ccl_local_id(1) == 0) {
+    kg->data = data;
 
-		kernel_split_params.queue_index = queue_index;
-		kernel_split_params.use_queues_flag = use_queues_flag;
-		kernel_split_params.work_pools = work_pools;
-		kernel_split_params.tile.buffer = buffer;
+    kernel_split_params.queue_index = queue_index;
+    kernel_split_params.use_queues_flag = use_queues_flag;
+    kernel_split_params.work_pools = work_pools;
+    kernel_split_params.tile.buffer = buffer;
 
-		split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state);
+    split_data_init(kg,
+                    &kernel_split_state,
+                    ccl_global_size(0) * ccl_global_size(1),
+                    split_data_buffer,
+                    ray_state);
+  }
 
-	}
+  kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
 
-	kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
-
-	KERNEL_NAME_EVAL(kernel, KERNEL_NAME)(
-			kg
+  KERNEL_NAME_EVAL(kernel, KERNEL_NAME)
+  (kg
 #ifdef LOCALS_TYPE
-			, &locals
+   ,
+   &locals
 #endif
-		);
+  );
 }
 
 #undef KERNEL_NAME_JOIN
diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt
index 0a3d0b974cb..28d9ca854db 100644
--- a/intern/cycles/kernel/osl/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/CMakeLists.txt
@@ -1,6 +1,6 @@
 
 set(INC
-	../..
+  ../..
 )
 
 set(INC_SYS
@@ -8,25 +8,25 @@ set(INC_SYS
 )
 
 set(SRC
-	background.cpp
-	bsdf_diffuse_ramp.cpp
-	bsdf_phong_ramp.cpp
-	emissive.cpp
-	osl_bssrdf.cpp
-	osl_closures.cpp
-	osl_services.cpp
-	osl_shader.cpp
+  background.cpp
+  bsdf_diffuse_ramp.cpp
+  bsdf_phong_ramp.cpp
+  emissive.cpp
+  osl_bssrdf.cpp
+  osl_closures.cpp
+  osl_services.cpp
+  osl_shader.cpp
 )
 
 set(HEADER_SRC
-	osl_closures.h
-	osl_globals.h
-	osl_services.h
-	osl_shader.h
+  osl_closures.h
+  osl_globals.h
+  osl_services.h
+  osl_shader.h
 )
 
 set(LIB
-	cycles_render
+  cycles_render
 )
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}")
diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp
index 6924a4144c5..b395227845d 100644
--- a/intern/cycles/kernel/osl/background.cpp
+++ b/intern/cycles/kernel/osl/background.cpp
@@ -51,11 +51,11 @@ using namespace OSL;
 /// only the weight is taking into account
 ///
 class GenericBackgroundClosure : public CClosurePrimitive {
-public:
-	void setup(ShaderData *sd, int /* path_flag */, float3 weight)
-	{
-		background_setup(sd, weight);
-	}
+ public:
+  void setup(ShaderData *sd, int /* path_flag */, float3 weight)
+  {
+    background_setup(sd, weight);
+  }
 };
 
 /// Holdout closure
@@ -66,31 +66,28 @@ public:
 /// used
 ///
 class HoldoutClosure : CClosurePrimitive {
-public:
-	void setup(ShaderData *sd, int /* path_flag */, float3 weight)
-	{
-		closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, weight);
-		sd->flag |= SD_HOLDOUT;
-	}
+ public:
+  void setup(ShaderData *sd, int /* path_flag */, float3 weight)
+  {
+    closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, weight);
+    sd->flag |= SD_HOLDOUT;
+  }
 };
 
 ClosureParam *closure_background_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_STRING_KEYPARAM(GenericBackgroundClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(GenericBackgroundClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_STRING_KEYPARAM(GenericBackgroundClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(GenericBackgroundClosure)};
+  return params;
 }
 
 CCLOSURE_PREPARE(closure_background_prepare, GenericBackgroundClosure)
 
 ClosureParam *closure_holdout_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FINISH_PARAM(HoldoutClosure)
-	};
-	return params;
+  static ClosureParam params[] = {CLOSURE_FINISH_PARAM(HoldoutClosure)};
+  return params;
 }
 
 CCLOSURE_PREPARE(closure_holdout_prepare, HoldoutClosure)
diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
index ed5d5235a34..c5edc7c9be3 100644
--- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
@@ -47,36 +47,35 @@ CCL_NAMESPACE_BEGIN
 using namespace OSL;
 
 class DiffuseRampClosure : public CBSDFClosure {
-public:
-	DiffuseRampBsdf params;
-	Color3 colors[8];
+ public:
+  DiffuseRampBsdf params;
+  Color3 colors[8];
 
-	void setup(ShaderData *sd, int /* path_flag */, float3 weight)
-	{
-	    DiffuseRampBsdf *bsdf = (DiffuseRampBsdf*)bsdf_alloc_osl(sd, sizeof(DiffuseRampBsdf), weight, &params);
+  void setup(ShaderData *sd, int /* path_flag */, float3 weight)
+  {
+    DiffuseRampBsdf *bsdf = (DiffuseRampBsdf *)bsdf_alloc_osl(
+        sd, sizeof(DiffuseRampBsdf), weight, &params);
 
-		if(bsdf) {
-			bsdf->colors = (float3*)closure_alloc_extra(sd, sizeof(float3)*8);
+    if (bsdf) {
+      bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8);
 
-			if(bsdf->colors) {
-				for(int i = 0; i < 8; i++)
-					bsdf->colors[i] = TO_FLOAT3(colors[i]);
+      if (bsdf->colors) {
+        for (int i = 0; i < 8; i++)
+          bsdf->colors[i] = TO_FLOAT3(colors[i]);
 
-				sd->flag |= bsdf_diffuse_ramp_setup(bsdf);
-			}
-		}
-	}
+        sd->flag |= bsdf_diffuse_ramp_setup(bsdf);
+      }
+    }
+  }
 };
 
 ClosureParam *closure_bsdf_diffuse_ramp_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(DiffuseRampClosure, params.N),
-		CLOSURE_COLOR_ARRAY_PARAM(DiffuseRampClosure, colors, 8),
-		CLOSURE_STRING_KEYPARAM(DiffuseRampClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(DiffuseRampClosure)
-	};
-	return params;
+  static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(DiffuseRampClosure, params.N),
+                                  CLOSURE_COLOR_ARRAY_PARAM(DiffuseRampClosure, colors, 8),
+                                  CLOSURE_STRING_KEYPARAM(DiffuseRampClosure, label, "label"),
+                                  CLOSURE_FINISH_PARAM(DiffuseRampClosure)};
+  return params;
 }
 
 CCLOSURE_PREPARE(closure_bsdf_diffuse_ramp_prepare, DiffuseRampClosure)
diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
index a8acdb8e342..4b7e59ff932 100644
--- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
@@ -46,37 +46,36 @@ CCL_NAMESPACE_BEGIN
 using namespace OSL;
 
 class PhongRampClosure : public CBSDFClosure {
-public:
-	PhongRampBsdf params;
-	Color3 colors[8];
+ public:
+  PhongRampBsdf params;
+  Color3 colors[8];
 
-	void setup(ShaderData *sd, int /* path_flag */, float3 weight)
-	{
-	    PhongRampBsdf *bsdf = (PhongRampBsdf*)bsdf_alloc_osl(sd, sizeof(PhongRampBsdf), weight, &params);
+  void setup(ShaderData *sd, int /* path_flag */, float3 weight)
+  {
+    PhongRampBsdf *bsdf = (PhongRampBsdf *)bsdf_alloc_osl(
+        sd, sizeof(PhongRampBsdf), weight, &params);
 
-		if(bsdf) {
-			bsdf->colors = (float3*)closure_alloc_extra(sd, sizeof(float3)*8);
+    if (bsdf) {
+      bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8);
 
-			if(bsdf->colors) {
-				for(int i = 0; i < 8; i++)
-					bsdf->colors[i] = TO_FLOAT3(colors[i]);
+      if (bsdf->colors) {
+        for (int i = 0; i < 8; i++)
+          bsdf->colors[i] = TO_FLOAT3(colors[i]);
 
-				sd->flag |= bsdf_phong_ramp_setup(bsdf);
-			}
-		}
-	}
+        sd->flag |= bsdf_phong_ramp_setup(bsdf);
+      }
+    }
+  }
 };
 
 ClosureParam *closure_bsdf_phong_ramp_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(PhongRampClosure, params.N),
-		CLOSURE_FLOAT_PARAM(PhongRampClosure, params.exponent),
-		CLOSURE_COLOR_ARRAY_PARAM(PhongRampClosure, colors, 8),
-		CLOSURE_STRING_KEYPARAM(PhongRampClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(PhongRampClosure)
-	};
-	return params;
+  static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PhongRampClosure, params.N),
+                                  CLOSURE_FLOAT_PARAM(PhongRampClosure, params.exponent),
+                                  CLOSURE_COLOR_ARRAY_PARAM(PhongRampClosure, colors, 8),
+                                  CLOSURE_STRING_KEYPARAM(PhongRampClosure, label, "label"),
+                                  CLOSURE_FINISH_PARAM(PhongRampClosure)};
+  return params;
 }
 
 CCLOSURE_PREPARE(closure_bsdf_phong_ramp_prepare, PhongRampClosure)
diff --git a/intern/cycles/kernel/osl/emissive.cpp b/intern/cycles/kernel/osl/emissive.cpp
index c2a848231f2..c29ddb13e2e 100644
--- a/intern/cycles/kernel/osl/emissive.cpp
+++ b/intern/cycles/kernel/osl/emissive.cpp
@@ -53,20 +53,18 @@ using namespace OSL;
 /// if the provided angles are PI/2, which is the default
 ///
 class GenericEmissiveClosure : public CClosurePrimitive {
-public:
-	void setup(ShaderData *sd, int /* path_flag */, float3 weight)
-	{
-		emission_setup(sd, weight);
-	}
+ public:
+  void setup(ShaderData *sd, int /* path_flag */, float3 weight)
+  {
+    emission_setup(sd, weight);
+  }
 };
 
 ClosureParam *closure_emission_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_STRING_KEYPARAM(GenericEmissiveClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(GenericEmissiveClosure)
-	};
-	return params;
+  static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(GenericEmissiveClosure, label, "label"),
+                                  CLOSURE_FINISH_PARAM(GenericEmissiveClosure)};
+  return params;
 }
 
 CCLOSURE_PREPARE(closure_emission_prepare, GenericEmissiveClosure)
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp
index 66ec8a996ca..dd52c33071c 100644
--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp
@@ -56,77 +56,76 @@ static ustring u_random_walk("random_walk");
 static ustring u_principled_random_walk("principled_random_walk");
 
 class CBSSRDFClosure : public CClosurePrimitive {
-public:
-	Bssrdf params;
-	ustring method;
-
-	CBSSRDFClosure()
-	{
-		params.texture_blur = 0.0f;
-		params.sharpness = 0.0f;
-		params.roughness = 0.0f;
-	}
-
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		if(method == u_cubic) {
-			alloc(sd, path_flag, weight, CLOSURE_BSSRDF_CUBIC_ID);
-		}
-		else if(method == u_gaussian) {
-			alloc(sd, path_flag, weight, CLOSURE_BSSRDF_GAUSSIAN_ID);
-		}
-		else if(method == u_burley) {
-			alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID);
-		}
-		else if(method == u_principled) {
-			alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_ID);
-		}
-		else if(method == u_random_walk) {
-			alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_ID);
-		}
-		else if(method == u_principled_random_walk) {
-			alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID);
-		}
-	}
-
-	void alloc(ShaderData *sd, int path_flag, float3 weight, ClosureType type)
-	{
-		Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
-
-		if(bssrdf) {
-			/* disable in case of diffuse ancestor, can't see it well then and
-			 * adds considerably noise due to probabilities of continuing path
-			 * getting lower and lower */
-			if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
-				params.radius = make_float3(0.0f, 0.0f, 0.0f);
-			}
-
-			/* create one closure per color channel */
-			bssrdf->radius = params.radius;
-			bssrdf->albedo = params.albedo;
-			bssrdf->texture_blur = params.texture_blur;
-			bssrdf->sharpness = params.sharpness;
-			bssrdf->N = params.N;
-			bssrdf->roughness = params.roughness;
-			sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type);
-		}
-	}
+ public:
+  Bssrdf params;
+  ustring method;
+
+  CBSSRDFClosure()
+  {
+    params.texture_blur = 0.0f;
+    params.sharpness = 0.0f;
+    params.roughness = 0.0f;
+  }
+
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    if (method == u_cubic) {
+      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_CUBIC_ID);
+    }
+    else if (method == u_gaussian) {
+      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_GAUSSIAN_ID);
+    }
+    else if (method == u_burley) {
+      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID);
+    }
+    else if (method == u_principled) {
+      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_ID);
+    }
+    else if (method == u_random_walk) {
+      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_ID);
+    }
+    else if (method == u_principled_random_walk) {
+      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID);
+    }
+  }
+
+  void alloc(ShaderData *sd, int path_flag, float3 weight, ClosureType type)
+  {
+    Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
+
+    if (bssrdf) {
+      /* disable in case of diffuse ancestor, can't see it well then and
+       * adds considerably noise due to probabilities of continuing path
+       * getting lower and lower */
+      if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
+        params.radius = make_float3(0.0f, 0.0f, 0.0f);
+      }
+
+      /* create one closure per color channel */
+      bssrdf->radius = params.radius;
+      bssrdf->albedo = params.albedo;
+      bssrdf->texture_blur = params.texture_blur;
+      bssrdf->sharpness = params.sharpness;
+      bssrdf->N = params.N;
+      bssrdf->roughness = params.roughness;
+      sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type);
+    }
+  }
 };
 
 ClosureParam *closure_bssrdf_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_STRING_PARAM(CBSSRDFClosure, method),
-		CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.N),
-		CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.radius),
-		CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.albedo),
-		CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.texture_blur, "texture_blur"),
-		CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.sharpness, "sharpness"),
-		CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.roughness, "roughness"),
-		CLOSURE_STRING_KEYPARAM(CBSSRDFClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(CBSSRDFClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_STRING_PARAM(CBSSRDFClosure, method),
+      CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.N),
+      CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.radius),
+      CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.albedo),
+      CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.texture_blur, "texture_blur"),
+      CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.sharpness, "sharpness"),
+      CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.roughness, "roughness"),
+      CLOSURE_STRING_KEYPARAM(CBSSRDFClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(CBSSRDFClosure)};
+  return params;
 }
 
 CCLOSURE_PREPARE(closure_bssrdf_prepare, CBSSRDFClosure)
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index 169351d5ad9..aa7e2727577 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -71,706 +71,787 @@ using namespace OSL;
 /* BSDF class definitions */
 
 BSDF_CLOSURE_CLASS_BEGIN(Diffuse, diffuse, DiffuseBsdf, LABEL_DIFFUSE)
-	CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N),
-BSDF_CLOSURE_CLASS_END(Diffuse, diffuse)
-
-BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, DiffuseBsdf, LABEL_DIFFUSE)
-	CLOSURE_FLOAT3_PARAM(TranslucentClosure, params.N),
-BSDF_CLOSURE_CLASS_END(Translucent, translucent)
-
-BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, OrenNayarBsdf, LABEL_DIFFUSE)
-	CLOSURE_FLOAT3_PARAM(OrenNayarClosure, params.N),
-	CLOSURE_FLOAT_PARAM(OrenNayarClosure, params.roughness),
-BSDF_CLOSURE_CLASS_END(OrenNayar, oren_nayar)
-
-BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, MicrofacetBsdf, LABEL_SINGULAR)
-	CLOSURE_FLOAT3_PARAM(ReflectionClosure, params.N),
-BSDF_CLOSURE_CLASS_END(Reflection, reflection)
-
-BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, MicrofacetBsdf, LABEL_SINGULAR)
-	CLOSURE_FLOAT3_PARAM(RefractionClosure, params.N),
-	CLOSURE_FLOAT_PARAM(RefractionClosure, params.ior),
-BSDF_CLOSURE_CLASS_END(Refraction, refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, VelvetBsdf, LABEL_DIFFUSE)
-	CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, params.N),
-	CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, params.sigma),
-BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet)
-
-BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, ashikhmin_shirley_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT)
-	CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N),
-	CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T),
-	CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x),
-	CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y),
-BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley_aniso)
-
-BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE)
-	CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N),
-	CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.size),
-	CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.smooth),
-BSDF_CLOSURE_CLASS_END(DiffuseToon, diffuse_toon)
-
-BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, ToonBsdf, LABEL_GLOSSY)
-	CLOSURE_FLOAT3_PARAM(GlossyToonClosure, params.N),
-	CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.size),
-	CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth),
-BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, microfacet_ggx, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT)
-	CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N),
-	CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x),
-BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso, microfacet_ggx_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT)
-	CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.N),
-	CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.T),
-	CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_x),
-	CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_y),
-BSDF_CLOSURE_CLASS_END(MicrofacetGGXAniso, microfacet_ggx_aniso)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, microfacet_beckmann, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT)
-	CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N),
-	CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x),
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso, microfacet_beckmann_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT)
-	CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.N),
-	CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.T),
-	CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_x),
-	CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_y),
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannAniso, microfacet_beckmann_aniso)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction, MicrofacetBsdf, LABEL_GLOSSY|LABEL_TRANSMIT)
-	CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, params.N),
-	CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.alpha_x),
-	CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.ior),
-BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction, MicrofacetBsdf, LABEL_GLOSSY|LABEL_TRANSMIT)
-	CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, params.N),
-	CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.alpha_x),
-	CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.ior),
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY)
-	CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N),
-	CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1),
-	CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2),
-	CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
-	CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset),
-BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection)
-
-BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY)
-	CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N),
-	CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1),
-	CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2),
-	CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
-	CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset),
-BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission)
-
-BSDF_CLOSURE_CLASS_BEGIN(PrincipledDiffuse, principled_diffuse, PrincipledDiffuseBsdf, LABEL_DIFFUSE)
-	CLOSURE_FLOAT3_PARAM(PrincipledDiffuseClosure, params.N),
-	CLOSURE_FLOAT_PARAM(PrincipledDiffuseClosure, params.roughness),
-BSDF_CLOSURE_CLASS_END(PrincipledDiffuse, principled_diffuse)
-
-BSDF_CLOSURE_CLASS_BEGIN(PrincipledSheen, principled_sheen, PrincipledSheenBsdf, LABEL_DIFFUSE)
-	CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N),
-BSDF_CLOSURE_CLASS_END(PrincipledSheen, principled_sheen)
-
-/* PRINCIPLED HAIR BSDF */
-class PrincipledHairClosure : public CBSDFClosure {
-public:
-	PrincipledHairBSDF params;
-
-	PrincipledHairBSDF *alloc(ShaderData *sd, int path_flag, float3 weight)
-	{
-		PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)bsdf_alloc_osl(sd, sizeof(PrincipledHairBSDF), weight, &params);
-		if(!bsdf) {
-			return NULL;
-		}
-
-		PrincipledHairExtra *extra = (PrincipledHairExtra*)closure_alloc_extra(sd, sizeof(PrincipledHairExtra));
-		if(!extra) {
-			return NULL;
-		}
-
-		bsdf->extra = extra;
-		return bsdf;
-	}
-
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		if(!skip(sd, path_flag, LABEL_GLOSSY)) {
-			PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)alloc(sd, path_flag, weight);
-			if(!bsdf) {
-				return;
-			}
-
-			sd->flag |= (bsdf) ? bsdf_principled_hair_setup(sd, bsdf) : 0;
-		}
-	}
+CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N),
+    BSDF_CLOSURE_CLASS_END(Diffuse, diffuse)
+
+        BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, DiffuseBsdf, LABEL_DIFFUSE)
+            CLOSURE_FLOAT3_PARAM(TranslucentClosure, params.N),
+    BSDF_CLOSURE_CLASS_END(Translucent, translucent)
+
+        BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, OrenNayarBsdf, LABEL_DIFFUSE)
+            CLOSURE_FLOAT3_PARAM(OrenNayarClosure, params.N),
+    CLOSURE_FLOAT_PARAM(OrenNayarClosure, params.roughness),
+    BSDF_CLOSURE_CLASS_END(OrenNayar, oren_nayar)
+
+        BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, MicrofacetBsdf, LABEL_SINGULAR)
+            CLOSURE_FLOAT3_PARAM(ReflectionClosure, params.N),
+    BSDF_CLOSURE_CLASS_END(Reflection, reflection)
+
+        BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, MicrofacetBsdf, LABEL_SINGULAR)
+            CLOSURE_FLOAT3_PARAM(RefractionClosure, params.N),
+    CLOSURE_FLOAT_PARAM(RefractionClosure, params.ior),
+    BSDF_CLOSURE_CLASS_END(Refraction, refraction)
+
+        BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, VelvetBsdf, LABEL_DIFFUSE)
+            CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, params.N),
+    CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, params.sigma),
+    BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet)
+
+        BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley,
+                                 ashikhmin_shirley_aniso,
+                                 MicrofacetBsdf,
+                                 LABEL_GLOSSY | LABEL_REFLECT)
+            CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N),
+    CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T),
+    CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x),
+    CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y),
+    BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley_aniso)
+
+        BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE)
+            CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N),
+    CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.size),
+    CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.smooth),
+    BSDF_CLOSURE_CLASS_END(DiffuseToon, diffuse_toon)
+
+        BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, ToonBsdf, LABEL_GLOSSY)
+            CLOSURE_FLOAT3_PARAM(GlossyToonClosure, params.N),
+    CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.size),
+    CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth),
+    BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon)
+
+        BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX,
+                                 microfacet_ggx,
+                                 MicrofacetBsdf,
+                                 LABEL_GLOSSY | LABEL_REFLECT)
+            CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N),
+    CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x),
+    BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx)
+
+        BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso,
+                                 microfacet_ggx_aniso,
+                                 MicrofacetBsdf,
+                                 LABEL_GLOSSY | LABEL_REFLECT)
+            CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.N),
+    CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.T),
+    CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_x),
+    CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_y),
+    BSDF_CLOSURE_CLASS_END(MicrofacetGGXAniso, microfacet_ggx_aniso)
+
+        BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann,
+                                 microfacet_beckmann,
+                                 MicrofacetBsdf,
+                                 LABEL_GLOSSY | LABEL_REFLECT)
+            CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N),
+    CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x),
+    BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann)
+
+        BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso,
+                                 microfacet_beckmann_aniso,
+                                 MicrofacetBsdf,
+                                 LABEL_GLOSSY | LABEL_REFLECT)
+            CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.N),
+    CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.T),
+    CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_x),
+    CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_y),
+    BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannAniso, microfacet_beckmann_aniso)
+
+        BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction,
+                                 microfacet_ggx_refraction,
+                                 MicrofacetBsdf,
+                                 LABEL_GLOSSY | LABEL_TRANSMIT)
+            CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, params.N),
+    CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.alpha_x),
+    CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.ior),
+    BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction)
+
+        BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction,
+                                 microfacet_beckmann_refraction,
+                                 MicrofacetBsdf,
+                                 LABEL_GLOSSY | LABEL_TRANSMIT)
+            CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, params.N),
+    CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.alpha_x),
+    CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.ior),
+    BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
+
+        BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY)
+            CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N),
+    CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1),
+    CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2),
+    CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
+    CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset),
+    BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection)
+
+        BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY)
+            CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N),
+    CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1),
+    CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2),
+    CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
+    CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset),
+    BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission)
+
+        BSDF_CLOSURE_CLASS_BEGIN(PrincipledDiffuse,
+                                 principled_diffuse,
+                                 PrincipledDiffuseBsdf,
+                                 LABEL_DIFFUSE)
+            CLOSURE_FLOAT3_PARAM(PrincipledDiffuseClosure, params.N),
+    CLOSURE_FLOAT_PARAM(PrincipledDiffuseClosure, params.roughness),
+    BSDF_CLOSURE_CLASS_END(PrincipledDiffuse, principled_diffuse)
+
+        BSDF_CLOSURE_CLASS_BEGIN(PrincipledSheen,
+                                 principled_sheen,
+                                 PrincipledSheenBsdf,
+                                 LABEL_DIFFUSE)
+            CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N),
+    BSDF_CLOSURE_CLASS_END(PrincipledSheen, principled_sheen)
+
+    /* PRINCIPLED HAIR BSDF */
+    class PrincipledHairClosure : public CBSDFClosure {
+ public:
+  PrincipledHairBSDF params;
+
+  PrincipledHairBSDF *alloc(ShaderData *sd, int path_flag, float3 weight)
+  {
+    PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc_osl(
+        sd, sizeof(PrincipledHairBSDF), weight, &params);
+    if (!bsdf) {
+      return NULL;
+    }
+
+    PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra(
+        sd, sizeof(PrincipledHairExtra));
+    if (!extra) {
+      return NULL;
+    }
+
+    bsdf->extra = extra;
+    return bsdf;
+  }
+
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    if (!skip(sd, path_flag, LABEL_GLOSSY)) {
+      PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)alloc(sd, path_flag, weight);
+      if (!bsdf) {
+        return;
+      }
+
+      sd->flag |= (bsdf) ? bsdf_principled_hair_setup(sd, bsdf) : 0;
+    }
+  }
 };
 
 static ClosureParam *closure_bsdf_principled_hair_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.N),
-		CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.sigma),
-		CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.v),
-		CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.s),
-		CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.m0_roughness),
-		CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.alpha),
-		CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.eta),
-		CLOSURE_STRING_KEYPARAM(PrincipledHairClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(PrincipledHairClosure)
-	};
-
-	return params;
+  static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.N),
+                                  CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.sigma),
+                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.v),
+                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.s),
+                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.m0_roughness),
+                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.alpha),
+                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.eta),
+                                  CLOSURE_STRING_KEYPARAM(PrincipledHairClosure, label, "label"),
+                                  CLOSURE_FINISH_PARAM(PrincipledHairClosure)};
+
+  return params;
 }
 
 CCLOSURE_PREPARE(closure_bsdf_principled_hair_prepare, PrincipledHairClosure)
 
 /* DISNEY PRINCIPLED CLEARCOAT */
 class PrincipledClearcoatClosure : public CBSDFClosure {
-public:
-	MicrofacetBsdf params;
-	float clearcoat, clearcoat_roughness;
-
-	MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
-	{
-		MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, &params);
-		if(!bsdf) {
-			return NULL;
-		}
-
-		MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
-		if(!extra) {
-			return NULL;
-		}
-
-		bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-		bsdf->extra = extra;
-		bsdf->ior = 1.5f;
-		bsdf->alpha_x = clearcoat_roughness;
-		bsdf->alpha_y = clearcoat_roughness;
-		bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
-		bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
-		bsdf->extra->clearcoat = clearcoat;
-		return bsdf;
-	}
-
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-		if(!bsdf) {
-			return;
-		}
-
-		sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
-	}
+ public:
+  MicrofacetBsdf params;
+  float clearcoat, clearcoat_roughness;
+
+  MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+        sd, sizeof(MicrofacetBsdf), weight, &params);
+    if (!bsdf) {
+      return NULL;
+    }
+
+    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+    if (!extra) {
+      return NULL;
+    }
+
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->extra = extra;
+    bsdf->ior = 1.5f;
+    bsdf->alpha_x = clearcoat_roughness;
+    bsdf->alpha_y = clearcoat_roughness;
+    bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
+    bsdf->extra->clearcoat = clearcoat;
+    return bsdf;
+  }
+
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
+  }
 };
 
 ClosureParam *closure_bsdf_principled_clearcoat_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N),
-		CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat),
-		CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness),
-		CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N),
+      CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat),
+      CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness),
+      CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure)};
+  return params;
 }
 CCLOSURE_PREPARE(closure_bsdf_principled_clearcoat_prepare, PrincipledClearcoatClosure)
 
-
 /* Registration */
 
-static void register_closure(OSL::ShadingSystem *ss, const char *name, int id, OSL::ClosureParam *params, OSL::PrepareClosureFunc prepare)
+static void register_closure(OSL::ShadingSystem *ss,
+                             const char *name,
+                             int id,
+                             OSL::ClosureParam *params,
+                             OSL::PrepareClosureFunc prepare)
 {
-	/* optimization: it's possible to not use a prepare function at all and
-	 * only initialize the actual class when accessing the closure component
-	 * data, but then we need to map the id to the class somehow */
+  /* optimization: it's possible to not use a prepare function at all and
+   * only initialize the actual class when accessing the closure component
+   * data, but then we need to map the id to the class somehow */
 #if OSL_LIBRARY_VERSION_CODE >= 10900
-	ss->register_closure(name, id, params, prepare, NULL);
+  ss->register_closure(name, id, params, prepare, NULL);
 #else
-	ss->register_closure(name, id, params, prepare, NULL, 16);
+  ss->register_closure(name, id, params, prepare, NULL, 16);
 #endif
 }
 
 void OSLShader::register_closures(OSLShadingSystem *ss_)
 {
-	OSL::ShadingSystem *ss = (OSL::ShadingSystem*)ss_;
-	int id = 0;
-
-	register_closure(ss, "diffuse", id++,
-		bsdf_diffuse_params(), bsdf_diffuse_prepare);
-	register_closure(ss, "oren_nayar", id++,
-		bsdf_oren_nayar_params(), bsdf_oren_nayar_prepare);
-	register_closure(ss, "translucent", id++,
-		bsdf_translucent_params(), bsdf_translucent_prepare);
-	register_closure(ss, "reflection", id++,
-		bsdf_reflection_params(), bsdf_reflection_prepare);
-	register_closure(ss, "refraction", id++,
-		bsdf_refraction_params(), bsdf_refraction_prepare);
-	register_closure(ss, "transparent", id++,
-		closure_bsdf_transparent_params(), closure_bsdf_transparent_prepare);
-	register_closure(ss, "microfacet_ggx", id++,
-		bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare);
-	register_closure(ss, "microfacet_ggx_aniso", id++,
-		bsdf_microfacet_ggx_aniso_params(), bsdf_microfacet_ggx_aniso_prepare);
-	register_closure(ss, "microfacet_ggx_refraction", id++,
-		bsdf_microfacet_ggx_refraction_params(), bsdf_microfacet_ggx_refraction_prepare);
-	register_closure(ss, "microfacet_multi_ggx", id++,
-		closure_bsdf_microfacet_multi_ggx_params(), closure_bsdf_microfacet_multi_ggx_prepare);
-	register_closure(ss, "microfacet_multi_ggx_glass", id++,
-		closure_bsdf_microfacet_multi_ggx_glass_params(), closure_bsdf_microfacet_multi_ggx_glass_prepare);
-	register_closure(ss, "microfacet_multi_ggx_aniso", id++,
-		closure_bsdf_microfacet_multi_ggx_aniso_params(), closure_bsdf_microfacet_multi_ggx_aniso_prepare);
-	register_closure(ss, "microfacet_ggx_fresnel", id++,
-		closure_bsdf_microfacet_ggx_fresnel_params(), closure_bsdf_microfacet_ggx_fresnel_prepare);
-	register_closure(ss, "microfacet_ggx_aniso_fresnel", id++,
-		closure_bsdf_microfacet_ggx_aniso_fresnel_params(), closure_bsdf_microfacet_ggx_aniso_fresnel_prepare);
-	register_closure(ss, "microfacet_multi_ggx_fresnel", id++,
-		closure_bsdf_microfacet_multi_ggx_fresnel_params(), closure_bsdf_microfacet_multi_ggx_fresnel_prepare);
-	register_closure(ss, "microfacet_multi_ggx_glass_fresnel", id++,
-		closure_bsdf_microfacet_multi_ggx_glass_fresnel_params(), closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare);
-	register_closure(ss, "microfacet_multi_ggx_aniso_fresnel", id++,
-		closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params(), closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare);
-	register_closure(ss, "microfacet_beckmann", id++,
-		bsdf_microfacet_beckmann_params(), bsdf_microfacet_beckmann_prepare);
-	register_closure(ss, "microfacet_beckmann_aniso", id++,
-		bsdf_microfacet_beckmann_aniso_params(), bsdf_microfacet_beckmann_aniso_prepare);
-	register_closure(ss, "microfacet_beckmann_refraction", id++,
-		bsdf_microfacet_beckmann_refraction_params(), bsdf_microfacet_beckmann_refraction_prepare);
-	register_closure(ss, "ashikhmin_shirley", id++,
-		bsdf_ashikhmin_shirley_aniso_params(), bsdf_ashikhmin_shirley_aniso_prepare);
-	register_closure(ss, "ashikhmin_velvet", id++,
-		bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare);
-	register_closure(ss, "diffuse_toon", id++,
-		bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare);
-	register_closure(ss, "glossy_toon", id++,
-		bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare);
-	register_closure(ss, "principled_diffuse", id++,
-		bsdf_principled_diffuse_params(), bsdf_principled_diffuse_prepare);
-	register_closure(ss, "principled_sheen", id++,
-		bsdf_principled_sheen_params(), bsdf_principled_sheen_prepare);
-	register_closure(ss, "principled_clearcoat", id++,
-		closure_bsdf_principled_clearcoat_params(), closure_bsdf_principled_clearcoat_prepare);
-
-	register_closure(ss, "emission", id++,
-		closure_emission_params(), closure_emission_prepare);
-	register_closure(ss, "background", id++,
-		closure_background_params(), closure_background_prepare);
-	register_closure(ss, "holdout", id++,
-		closure_holdout_params(), closure_holdout_prepare);
-	register_closure(ss, "diffuse_ramp", id++,
-		closure_bsdf_diffuse_ramp_params(), closure_bsdf_diffuse_ramp_prepare);
-	register_closure(ss, "phong_ramp", id++,
-		closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare);
-	register_closure(ss, "bssrdf", id++,
-		closure_bssrdf_params(), closure_bssrdf_prepare);
-
-	register_closure(ss, "hair_reflection", id++,
-		bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare);
-	register_closure(ss, "hair_transmission", id++,
-		bsdf_hair_transmission_params(), bsdf_hair_transmission_prepare);
-
-	register_closure(ss, "principled_hair", id++,
-		closure_bsdf_principled_hair_params(), closure_bsdf_principled_hair_prepare);
-
-	register_closure(ss, "henyey_greenstein", id++,
-		closure_henyey_greenstein_params(), closure_henyey_greenstein_prepare);
-	register_closure(ss, "absorption", id++,
-		closure_absorption_params(), closure_absorption_prepare);
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)ss_;
+  int id = 0;
+
+  register_closure(ss, "diffuse", id++, bsdf_diffuse_params(), bsdf_diffuse_prepare);
+  register_closure(ss, "oren_nayar", id++, bsdf_oren_nayar_params(), bsdf_oren_nayar_prepare);
+  register_closure(ss, "translucent", id++, bsdf_translucent_params(), bsdf_translucent_prepare);
+  register_closure(ss, "reflection", id++, bsdf_reflection_params(), bsdf_reflection_prepare);
+  register_closure(ss, "refraction", id++, bsdf_refraction_params(), bsdf_refraction_prepare);
+  register_closure(ss,
+                   "transparent",
+                   id++,
+                   closure_bsdf_transparent_params(),
+                   closure_bsdf_transparent_prepare);
+  register_closure(
+      ss, "microfacet_ggx", id++, bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare);
+  register_closure(ss,
+                   "microfacet_ggx_aniso",
+                   id++,
+                   bsdf_microfacet_ggx_aniso_params(),
+                   bsdf_microfacet_ggx_aniso_prepare);
+  register_closure(ss,
+                   "microfacet_ggx_refraction",
+                   id++,
+                   bsdf_microfacet_ggx_refraction_params(),
+                   bsdf_microfacet_ggx_refraction_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_params(),
+                   closure_bsdf_microfacet_multi_ggx_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx_glass",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_glass_params(),
+                   closure_bsdf_microfacet_multi_ggx_glass_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx_aniso",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_aniso_params(),
+                   closure_bsdf_microfacet_multi_ggx_aniso_prepare);
+  register_closure(ss,
+                   "microfacet_ggx_fresnel",
+                   id++,
+                   closure_bsdf_microfacet_ggx_fresnel_params(),
+                   closure_bsdf_microfacet_ggx_fresnel_prepare);
+  register_closure(ss,
+                   "microfacet_ggx_aniso_fresnel",
+                   id++,
+                   closure_bsdf_microfacet_ggx_aniso_fresnel_params(),
+                   closure_bsdf_microfacet_ggx_aniso_fresnel_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx_fresnel",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_fresnel_params(),
+                   closure_bsdf_microfacet_multi_ggx_fresnel_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx_glass_fresnel",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_glass_fresnel_params(),
+                   closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx_aniso_fresnel",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params(),
+                   closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare);
+  register_closure(ss,
+                   "microfacet_beckmann",
+                   id++,
+                   bsdf_microfacet_beckmann_params(),
+                   bsdf_microfacet_beckmann_prepare);
+  register_closure(ss,
+                   "microfacet_beckmann_aniso",
+                   id++,
+                   bsdf_microfacet_beckmann_aniso_params(),
+                   bsdf_microfacet_beckmann_aniso_prepare);
+  register_closure(ss,
+                   "microfacet_beckmann_refraction",
+                   id++,
+                   bsdf_microfacet_beckmann_refraction_params(),
+                   bsdf_microfacet_beckmann_refraction_prepare);
+  register_closure(ss,
+                   "ashikhmin_shirley",
+                   id++,
+                   bsdf_ashikhmin_shirley_aniso_params(),
+                   bsdf_ashikhmin_shirley_aniso_prepare);
+  register_closure(
+      ss, "ashikhmin_velvet", id++, bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare);
+  register_closure(
+      ss, "diffuse_toon", id++, bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare);
+  register_closure(ss, "glossy_toon", id++, bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare);
+  register_closure(ss,
+                   "principled_diffuse",
+                   id++,
+                   bsdf_principled_diffuse_params(),
+                   bsdf_principled_diffuse_prepare);
+  register_closure(
+      ss, "principled_sheen", id++, bsdf_principled_sheen_params(), bsdf_principled_sheen_prepare);
+  register_closure(ss,
+                   "principled_clearcoat",
+                   id++,
+                   closure_bsdf_principled_clearcoat_params(),
+                   closure_bsdf_principled_clearcoat_prepare);
+
+  register_closure(ss, "emission", id++, closure_emission_params(), closure_emission_prepare);
+  register_closure(
+      ss, "background", id++, closure_background_params(), closure_background_prepare);
+  register_closure(ss, "holdout", id++, closure_holdout_params(), closure_holdout_prepare);
+  register_closure(ss,
+                   "diffuse_ramp",
+                   id++,
+                   closure_bsdf_diffuse_ramp_params(),
+                   closure_bsdf_diffuse_ramp_prepare);
+  register_closure(
+      ss, "phong_ramp", id++, closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare);
+  register_closure(ss, "bssrdf", id++, closure_bssrdf_params(), closure_bssrdf_prepare);
+
+  register_closure(
+      ss, "hair_reflection", id++, bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare);
+  register_closure(ss,
+                   "hair_transmission",
+                   id++,
+                   bsdf_hair_transmission_params(),
+                   bsdf_hair_transmission_prepare);
+
+  register_closure(ss,
+                   "principled_hair",
+                   id++,
+                   closure_bsdf_principled_hair_params(),
+                   closure_bsdf_principled_hair_prepare);
+
+  register_closure(ss,
+                   "henyey_greenstein",
+                   id++,
+                   closure_henyey_greenstein_params(),
+                   closure_henyey_greenstein_prepare);
+  register_closure(
+      ss, "absorption", id++, closure_absorption_params(), closure_absorption_prepare);
 }
 
 /* BSDF Closure */
 
 bool CBSDFClosure::skip(const ShaderData *sd, int path_flag, int scattering)
 {
-	/* caustic options */
-	if((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) {
-		KernelGlobals *kg = sd->osl_globals;
-
-		if((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) ||
-		   (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT)))
-		{
-			return true;
-		}
-	}
-
-	return false;
-}
+  /* caustic options */
+  if ((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) {
+    KernelGlobals *kg = sd->osl_globals;
 
+    if ((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) ||
+        (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) {
+      return true;
+    }
+  }
+
+  return false;
+}
 
 /* GGX closures with Fresnel */
 
 class MicrofacetFresnelClosure : public CBSDFClosure {
-public:
-	MicrofacetBsdf params;
-	float3 color;
-	float3 cspec0;
-
-	MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
-	{
-		/* Technically, the MultiGGX Glass closure may also transmit. However,
-		* since this is set statically and only used for caustic flags, this
-		* is probably as good as it gets. */
-		if(skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
-			return NULL;
-		}
-
-		MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, &params);
-		if(!bsdf) {
-			return NULL;
-		}
-
-		MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
-		if(!extra) {
-			return NULL;
-		}
-
-		bsdf->extra = extra;
-		bsdf->extra->color = color;
-		bsdf->extra->cspec0 = cspec0;
-		bsdf->extra->clearcoat = 0.0f;
-		return bsdf;
-	}
+ public:
+  MicrofacetBsdf params;
+  float3 color;
+  float3 cspec0;
+
+  MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
+  {
+    /* Technically, the MultiGGX Glass closure may also transmit. However,
+    * since this is set statically and only used for caustic flags, this
+    * is probably as good as it gets. */
+    if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+      return NULL;
+    }
+
+    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+        sd, sizeof(MicrofacetBsdf), weight, &params);
+    if (!bsdf) {
+      return NULL;
+    }
+
+    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+    if (!extra) {
+      return NULL;
+    }
+
+    bsdf->extra = extra;
+    bsdf->extra->color = color;
+    bsdf->extra->cspec0 = cspec0;
+    bsdf->extra->clearcoat = 0.0f;
+    return bsdf;
+  }
 };
 
 class MicrofacetGGXFresnelClosure : public MicrofacetFresnelClosure {
-public:
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-		if(!bsdf) {
-			return;
-		}
-
-		bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-		bsdf->alpha_y = bsdf->alpha_x;
-		sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
-	}
+ public:
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->alpha_y = bsdf->alpha_x;
+    sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
+  }
 };
 
 ClosureParam *closure_bsdf_microfacet_ggx_fresnel_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
-		CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
-		CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
-		CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
-		CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
-		CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
+      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
+      CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)};
+  return params;
 }
 CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_fresnel_prepare, MicrofacetGGXFresnelClosure);
 
 class MicrofacetGGXAnisoFresnelClosure : public MicrofacetFresnelClosure {
-public:
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-		if(!bsdf) {
-			return;
-		}
-
-		sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
-	}
+ public:
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
+  }
 };
 
 ClosureParam *closure_bsdf_microfacet_ggx_aniso_fresnel_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
-		CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.T),
-		CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
-		CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_y),
-		CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
-		CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
-		CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
-		CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.T),
+      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_y),
+      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
+      CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)};
+  return params;
 }
-CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_aniso_fresnel_prepare, MicrofacetGGXAnisoFresnelClosure);
-
+CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_aniso_fresnel_prepare,
+                 MicrofacetGGXAnisoFresnelClosure);
 
 /* Multiscattering GGX closures */
 
 class MicrofacetMultiClosure : public CBSDFClosure {
-public:
-	MicrofacetBsdf params;
-	float3 color;
-
-	MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
-	{
-		/* Technically, the MultiGGX closure may also transmit. However,
-		 * since this is set statically and only used for caustic flags, this
-		 * is probably as good as it gets. */
-	    if(skip(sd, path_flag, LABEL_GLOSSY|LABEL_REFLECT)) {
-			return NULL;
-		}
-
-		MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, &params);
-		if(!bsdf) {
-			return NULL;
-		}
-
-		MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
-		if(!extra) {
-			return NULL;
-		}
-
-		bsdf->extra = extra;
-		bsdf->extra->color = color;
-		bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
-		bsdf->extra->clearcoat = 0.0f;
-		return bsdf;
-	}
+ public:
+  MicrofacetBsdf params;
+  float3 color;
+
+  MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
+  {
+    /* Technically, the MultiGGX closure may also transmit. However,
+     * since this is set statically and only used for caustic flags, this
+     * is probably as good as it gets. */
+    if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+      return NULL;
+    }
+
+    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+        sd, sizeof(MicrofacetBsdf), weight, &params);
+    if (!bsdf) {
+      return NULL;
+    }
+
+    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+    if (!extra) {
+      return NULL;
+    }
+
+    bsdf->extra = extra;
+    bsdf->extra->color = color;
+    bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->extra->clearcoat = 0.0f;
+    return bsdf;
+  }
 };
 
 class MicrofacetMultiGGXClosure : public MicrofacetMultiClosure {
-public:
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-		if(!bsdf) {
-			return;
-		}
-
-		bsdf->ior = 0.0f;
-		bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-		bsdf->alpha_y = bsdf->alpha_x;
-		sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
-	}
+ public:
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->ior = 0.0f;
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->alpha_y = bsdf->alpha_x;
+    sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
+  }
 };
 
 ClosureParam *closure_bsdf_microfacet_multi_ggx_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
-		CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
+  return params;
 }
 CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_prepare, MicrofacetMultiGGXClosure);
 
 class MicrofacetMultiGGXAnisoClosure : public MicrofacetMultiClosure {
-public:
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-		if(!bsdf) {
-			return;
-		}
-
-		bsdf->ior = 0.0f;
-		sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf);
-	}
+ public:
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->ior = 0.0f;
+    sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf);
+  }
 };
 
 ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.T),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_y),
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
-		CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.T),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_y),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
+  return params;
 }
 CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_prepare, MicrofacetMultiGGXAnisoClosure);
 
 class MicrofacetMultiGGXGlassClosure : public MicrofacetMultiClosure {
-public:
-	MicrofacetMultiGGXGlassClosure() : MicrofacetMultiClosure() {}
-
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-		if(!bsdf) {
-			return;
-		}
-
-		bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-		bsdf->alpha_y = bsdf->alpha_x;
-		sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
-	}
+ public:
+  MicrofacetMultiGGXGlassClosure() : MicrofacetMultiClosure()
+  {
+  }
+
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->alpha_y = bsdf->alpha_x;
+    sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
+  }
 };
 
 ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.ior),
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
-		CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
+  return params;
 }
 CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_prepare, MicrofacetMultiGGXGlassClosure);
 
-
 /* Multiscattering GGX closures with Fresnel */
 
 class MicrofacetMultiFresnelClosure : public CBSDFClosure {
-public:
-	MicrofacetBsdf params;
-	float3 color;
-	float3 cspec0;
-
-	MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
-	{
-		/* Technically, the MultiGGX closure may also transmit. However,
-		* since this is set statically and only used for caustic flags, this
-		* is probably as good as it gets. */
-		if(skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
-			return NULL;
-		}
-
-		MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, &params);
-		if(!bsdf) {
-			return NULL;
-		}
-
-		MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
-		if(!extra) {
-			return NULL;
-		}
-
-		bsdf->extra = extra;
-		bsdf->extra->color = color;
-		bsdf->extra->cspec0 = cspec0;
-		bsdf->extra->clearcoat = 0.0f;
-		return bsdf;
-	}
+ public:
+  MicrofacetBsdf params;
+  float3 color;
+  float3 cspec0;
+
+  MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
+  {
+    /* Technically, the MultiGGX closure may also transmit. However,
+    * since this is set statically and only used for caustic flags, this
+    * is probably as good as it gets. */
+    if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+      return NULL;
+    }
+
+    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+        sd, sizeof(MicrofacetBsdf), weight, &params);
+    if (!bsdf) {
+      return NULL;
+    }
+
+    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+    if (!extra) {
+      return NULL;
+    }
+
+    bsdf->extra = extra;
+    bsdf->extra->color = color;
+    bsdf->extra->cspec0 = cspec0;
+    bsdf->extra->clearcoat = 0.0f;
+    return bsdf;
+  }
 };
 
 class MicrofacetMultiGGXFresnelClosure : public MicrofacetMultiFresnelClosure {
-public:
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-		if(!bsdf) {
-			return;
-		}
-
-		bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-		bsdf->alpha_y = bsdf->alpha_x;
-		sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
-	}
+ public:
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->alpha_y = bsdf->alpha_x;
+    sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
+  }
 };
 
 ClosureParam *closure_bsdf_microfacet_multi_ggx_fresnel_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
-		CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
+  return params;
 }
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_fresnel_prepare, MicrofacetMultiGGXFresnelClosure);
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_fresnel_prepare,
+                 MicrofacetMultiGGXFresnelClosure);
 
 class MicrofacetMultiGGXAnisoFresnelClosure : public MicrofacetMultiFresnelClosure {
-public:
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-		if(!bsdf) {
-			return;
-		}
-
-		sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
-	}
+ public:
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
+  }
 };
 
 ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.T),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_y),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
-		CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.T),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_y),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
+  return params;
 }
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare, MicrofacetMultiGGXAnisoFresnelClosure);
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare,
+                 MicrofacetMultiGGXAnisoFresnelClosure);
 
 class MicrofacetMultiGGXGlassFresnelClosure : public MicrofacetMultiFresnelClosure {
-public:
-	MicrofacetMultiGGXGlassFresnelClosure() : MicrofacetMultiFresnelClosure() {}
-
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-		if(!bsdf) {
-			return;
-		}
-
-		bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-		bsdf->alpha_y = bsdf->alpha_x;
-		sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
-	}
+ public:
+  MicrofacetMultiGGXGlassFresnelClosure() : MicrofacetMultiFresnelClosure()
+  {
+  }
+
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->alpha_y = bsdf->alpha_x;
+    sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
+  }
 };
 
 ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_fresnel_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
-		CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
-		CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
-		CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
+  return params;
 }
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare, MicrofacetMultiGGXGlassFresnelClosure);
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare,
+                 MicrofacetMultiGGXGlassFresnelClosure);
 
 /* Transparent */
 
 class TransparentClosure : public CBSDFClosure {
-public:
-	ShaderClosure params;
-	float3 unused;
-
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		bsdf_transparent_setup(sd, weight, path_flag);
-	}
+ public:
+  ShaderClosure params;
+  float3 unused;
+
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    bsdf_transparent_setup(sd, weight, path_flag);
+  }
 };
 
 ClosureParam *closure_bsdf_transparent_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_STRING_KEYPARAM(TransparentClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(TransparentClosure)
-	};
-	return params;
+  static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(TransparentClosure, label, "label"),
+                                  CLOSURE_FINISH_PARAM(TransparentClosure)};
+  return params;
 }
 
 CCLOSURE_PREPARE(closure_bsdf_transparent_prepare, TransparentClosure)
@@ -778,52 +859,49 @@ CCLOSURE_PREPARE(closure_bsdf_transparent_prepare, TransparentClosure)
 /* Volume */
 
 class VolumeAbsorptionClosure : public CBSDFClosure {
-public:
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		volume_extinction_setup(sd, weight);
-	}
+ public:
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    volume_extinction_setup(sd, weight);
+  }
 };
 
 ClosureParam *closure_absorption_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_STRING_KEYPARAM(VolumeAbsorptionClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(VolumeAbsorptionClosure)
-	};
-	return params;
+  static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(VolumeAbsorptionClosure, label, "label"),
+                                  CLOSURE_FINISH_PARAM(VolumeAbsorptionClosure)};
+  return params;
 }
 
 CCLOSURE_PREPARE(closure_absorption_prepare, VolumeAbsorptionClosure)
 
 class VolumeHenyeyGreensteinClosure : public CBSDFClosure {
-public:
-	HenyeyGreensteinVolume params;
+ public:
+  HenyeyGreensteinVolume params;
 
-	void setup(ShaderData *sd, int path_flag, float3 weight)
-	{
-		volume_extinction_setup(sd, weight);
+  void setup(ShaderData *sd, int path_flag, float3 weight)
+  {
+    volume_extinction_setup(sd, weight);
 
-	    HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume*)bsdf_alloc_osl(sd, sizeof(HenyeyGreensteinVolume), weight, &params);
-		if(!volume) {
-			return;
-		}
+    HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc_osl(
+        sd, sizeof(HenyeyGreensteinVolume), weight, &params);
+    if (!volume) {
+      return;
+    }
 
-		sd->flag |= volume_henyey_greenstein_setup(volume);
-	}
+    sd->flag |= volume_henyey_greenstein_setup(volume);
+  }
 };
 
 ClosureParam *closure_henyey_greenstein_params()
 {
-	static ClosureParam params[] = {
-		CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, params.g),
-		CLOSURE_STRING_KEYPARAM(VolumeHenyeyGreensteinClosure, label, "label"),
-		CLOSURE_FINISH_PARAM(VolumeHenyeyGreensteinClosure)
-	};
-	return params;
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, params.g),
+      CLOSURE_STRING_KEYPARAM(VolumeHenyeyGreensteinClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(VolumeHenyeyGreensteinClosure)};
+  return params;
 }
 
 CCLOSURE_PREPARE(closure_henyey_greenstein_prepare, VolumeHenyeyGreensteinClosure)
 
-
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
index 2a50704b569..d3db6b71f5c 100644
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@@ -74,24 +74,34 @@ void closure_bsdf_microfacet_multi_ggx_prepare(OSL::RendererServices *, int id,
 void closure_bsdf_microfacet_multi_ggx_glass_prepare(OSL::RendererServices *, int id, void *data);
 void closure_bsdf_microfacet_multi_ggx_aniso_prepare(OSL::RendererServices *, int id, void *data);
 void closure_bsdf_microfacet_ggx_fresnel_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_ggx_aniso_fresnel_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_fresnel_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_microfacet_ggx_aniso_fresnel_prepare(OSL::RendererServices *,
+                                                       int id,
+                                                       void *data);
+void closure_bsdf_microfacet_multi_ggx_fresnel_prepare(OSL::RendererServices *,
+                                                       int id,
+                                                       void *data);
+void closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare(OSL::RendererServices *,
+                                                             int id,
+                                                             void *data);
+void closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare(OSL::RendererServices *,
+                                                             int id,
+                                                             void *data);
 void closure_bsdf_principled_clearcoat_prepare(OSL::RendererServices *, int id, void *data);
 void closure_bsdf_principled_hair_prepare(OSL::RendererServices *, int id, void *data);
 
-#define CCLOSURE_PREPARE(name, classname)          \
-void name(RendererServices *, int id, void *data) \
-{                                                 \
-	memset(data, 0, sizeof(classname));           \
-	new (data) classname();                       \
-}
+#define CCLOSURE_PREPARE(name, classname) \
+  void name(RendererServices *, int id, void *data) \
+  { \
+    memset(data, 0, sizeof(classname)); \
+    new (data) classname(); \
+  }
 
 #define CCLOSURE_PREPARE_STATIC(name, classname) static CCLOSURE_PREPARE(name, classname)
 
 #define CLOSURE_FLOAT3_PARAM(st, fld) \
-	{ TypeDesc::TypeVector, (int)reckless_offsetof(st, fld), NULL, sizeof(OSL::Vec3) }
+  { \
+    TypeDesc::TypeVector, (int)reckless_offsetof(st, fld), NULL, sizeof(OSL::Vec3) \
+  }
 
 #define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z)
 #define TO_COLOR3(v) OSL::Color3(v.x, v.y, v.z)
@@ -100,50 +110,50 @@ void name(RendererServices *, int id, void *data) \
 /* Closure */
 
 class CClosurePrimitive {
-public:
-	virtual void setup(ShaderData *sd, int path_flag, float3 weight) = 0;
+ public:
+  virtual void setup(ShaderData *sd, int path_flag, float3 weight) = 0;
 
-	OSL::ustring label;
+  OSL::ustring label;
 };
 
 /* BSDF */
 
 class CBSDFClosure : public CClosurePrimitive {
-public:
-	bool skip(const ShaderData *sd, int path_flag, int scattering);
+ public:
+  bool skip(const ShaderData *sd, int path_flag, int scattering);
 };
 
 #define BSDF_CLOSURE_CLASS_BEGIN(Upper, lower, structname, TYPE) \
 \
-class Upper##Closure : public CBSDFClosure { \
-public: \
-	structname params; \
-	float3 unused; \
+  class Upper##Closure : public CBSDFClosure { \
+   public: \
+    structname params; \
+    float3 unused; \
 \
-	void setup(ShaderData *sd, int path_flag, float3 weight) \
-	{ \
-	    if(!skip(sd, path_flag, TYPE)) { \
-			structname *bsdf = (structname*)bsdf_alloc_osl(sd, sizeof(structname), weight, &params); \
-			sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \
-		} \
-	} \
-}; \
+    void setup(ShaderData *sd, int path_flag, float3 weight) \
+    { \
+      if (!skip(sd, path_flag, TYPE)) { \
+        structname *bsdf = (structname *)bsdf_alloc_osl(sd, sizeof(structname), weight, &params); \
+        sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \
+      } \
+    } \
+  }; \
 \
-static ClosureParam *bsdf_##lower##_params() \
-{ \
-	static ClosureParam params[] = {
+  static ClosureParam *bsdf_##lower##_params() \
+  { \
+    static ClosureParam params[] = {
 
 /* parameters */
 
 #define BSDF_CLOSURE_CLASS_END(Upper, lower) \
-		CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), \
-		CLOSURE_FINISH_PARAM(Upper##Closure) \
-	}; \
-	return params; \
-} \
+  CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), CLOSURE_FINISH_PARAM(Upper##Closure) \
+  } \
+  ; \
+  return params; \
+  } \
 \
-CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure)
+  CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure)
 
 CCL_NAMESPACE_END
 
-#endif  /* __OSL_CLOSURES_H__ */
+#endif /* __OSL_CLOSURES_H__ */
diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h
index 88192fbcccb..641c9967586 100644
--- a/intern/cycles/kernel/osl/osl_globals.h
+++ b/intern/cycles/kernel/osl/osl_globals.h
@@ -19,79 +19,79 @@
 
 #ifdef WITH_OSL
 
-#include <OSL/oslexec.h>
+#  include <OSL/oslexec.h>
 
-#include "util/util_map.h"
-#include "util/util_param.h"
-#include "util/util_thread.h"
-#include "util/util_vector.h"
+#  include "util/util_map.h"
+#  include "util/util_param.h"
+#  include "util/util_thread.h"
+#  include "util/util_vector.h"
 
-#ifndef WIN32
+#  ifndef WIN32
 using std::isfinite;
-#endif
+#  endif
 
 CCL_NAMESPACE_BEGIN
 
 class OSLRenderServices;
 
 struct OSLGlobals {
-	OSLGlobals()
-	{
-		ss = NULL;
-		ts = NULL;
-		services = NULL;
-		use = false;
-	}
-
-	bool use;
-
-	/* shading system */
-	OSL::ShadingSystem *ss;
-	OSL::TextureSystem *ts;
-	OSLRenderServices *services;
-
-	/* shader states */
-	vector<OSL::ShaderGroupRef> surface_state;
-	vector<OSL::ShaderGroupRef> volume_state;
-	vector<OSL::ShaderGroupRef> displacement_state;
-	vector<OSL::ShaderGroupRef> bump_state;
-	OSL::ShaderGroupRef background_state;
-
-	/* attributes */
-	struct Attribute {
-		TypeDesc type;
-		AttributeDescriptor desc;
-		ParamValue value;
-	};
-
-	typedef unordered_map<ustring, Attribute, ustringHash> AttributeMap;
-	typedef unordered_map<ustring, int, ustringHash> ObjectNameMap;
-
-	vector<AttributeMap> attribute_map;
-	ObjectNameMap object_name_map;
-	vector<ustring> object_names;
+  OSLGlobals()
+  {
+    ss = NULL;
+    ts = NULL;
+    services = NULL;
+    use = false;
+  }
+
+  bool use;
+
+  /* shading system */
+  OSL::ShadingSystem *ss;
+  OSL::TextureSystem *ts;
+  OSLRenderServices *services;
+
+  /* shader states */
+  vector<OSL::ShaderGroupRef> surface_state;
+  vector<OSL::ShaderGroupRef> volume_state;
+  vector<OSL::ShaderGroupRef> displacement_state;
+  vector<OSL::ShaderGroupRef> bump_state;
+  OSL::ShaderGroupRef background_state;
+
+  /* attributes */
+  struct Attribute {
+    TypeDesc type;
+    AttributeDescriptor desc;
+    ParamValue value;
+  };
+
+  typedef unordered_map<ustring, Attribute, ustringHash> AttributeMap;
+  typedef unordered_map<ustring, int, ustringHash> ObjectNameMap;
+
+  vector<AttributeMap> attribute_map;
+  ObjectNameMap object_name_map;
+  vector<ustring> object_names;
 };
 
 /* trace() call result */
 struct OSLTraceData {
-	Ray ray;
-	Intersection isect;
-	ShaderData sd;
-	bool setup;
-	bool init;
+  Ray ray;
+  Intersection isect;
+  ShaderData sd;
+  bool setup;
+  bool init;
 };
 
 /* thread key for thread specific data lookup */
 struct OSLThreadData {
-	OSL::ShaderGlobals globals;
-	OSL::PerThreadInfo *osl_thread_info;
-	OSLTraceData tracedata;
-	OSL::ShadingContext *context;
-	OIIO::TextureSystem::Perthread *oiio_thread_info;
+  OSL::ShaderGlobals globals;
+  OSL::PerThreadInfo *osl_thread_info;
+  OSLTraceData tracedata;
+  OSL::ShadingContext *context;
+  OIIO::TextureSystem::Perthread *oiio_thread_info;
 };
 
 CCL_NAMESPACE_END
 
 #endif
 
-#endif  /* __OSL_GLOBALS_H__ */
+#endif /* __OSL_GLOBALS_H__ */
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 6464d382634..eb9f672fd8a 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -63,16 +63,16 @@ CCL_NAMESPACE_BEGIN
 
 /* RenderServices implementation */
 
-static void copy_matrix(OSL::Matrix44& m, const Transform& tfm)
+static void copy_matrix(OSL::Matrix44 &m, const Transform &tfm)
 {
-	ProjectionTransform t = projection_transpose(ProjectionTransform(tfm));
-	memcpy((void *)&m, &t, sizeof(m));
+  ProjectionTransform t = projection_transpose(ProjectionTransform(tfm));
+  memcpy((void *)&m, &t, sizeof(m));
 }
 
-static void copy_matrix(OSL::Matrix44& m, const ProjectionTransform& tfm)
+static void copy_matrix(OSL::Matrix44 &m, const ProjectionTransform &tfm)
 {
-	ProjectionTransform t = projection_transpose(tfm);
-	memcpy((void *)&m, &t, sizeof(m));
+  ProjectionTransform t = projection_transpose(tfm);
+  memcpy((void *)&m, &t, sizeof(m));
 }
 
 /* static ustrings */
@@ -129,815 +129,846 @@ ustring OSLRenderServices::u_at_ao("@ao");
 
 OSLRenderServices::OSLRenderServices()
 {
-	kernel_globals = NULL;
-	osl_ts = NULL;
+  kernel_globals = NULL;
+  osl_ts = NULL;
 
 #ifdef WITH_PTEX
-	size_t maxmem = 16384 * 1024;
-	ptex_cache = PtexCache::create(0, maxmem);
+  size_t maxmem = 16384 * 1024;
+  ptex_cache = PtexCache::create(0, maxmem);
 #endif
 }
 
 OSLRenderServices::~OSLRenderServices()
 {
-	if(osl_ts) {
-		VLOG(2) << "OSL texture system stats:\n"
-		        << osl_ts->getstats();
-	}
+  if (osl_ts) {
+    VLOG(2) << "OSL texture system stats:\n" << osl_ts->getstats();
+  }
 #ifdef WITH_PTEX
-	ptex_cache->release();
+  ptex_cache->release();
 #endif
 }
 
 void OSLRenderServices::thread_init(KernelGlobals *kernel_globals_, OSL::TextureSystem *osl_ts_)
 {
-	kernel_globals = kernel_globals_;
-	osl_ts = osl_ts_;
+  kernel_globals = kernel_globals_;
+  osl_ts = osl_ts_;
 }
 
-bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
+                                   OSL::Matrix44 &result,
+                                   OSL::TransformationPtr xform,
+                                   float time)
 {
-	/* this is only used for shader and object space, we don't really have
-	 * a concept of shader space, so we just use object space for both. */
-	if(xform) {
-		const ShaderData *sd = (const ShaderData *)xform;
-		KernelGlobals *kg = sd->osl_globals;
-		int object = sd->object;
-
-		if(object != OBJECT_NONE) {
+  /* this is only used for shader and object space, we don't really have
+   * a concept of shader space, so we just use object space for both. */
+  if (xform) {
+    const ShaderData *sd = (const ShaderData *)xform;
+    KernelGlobals *kg = sd->osl_globals;
+    int object = sd->object;
+
+    if (object != OBJECT_NONE) {
 #ifdef __OBJECT_MOTION__
-			Transform tfm;
+      Transform tfm;
 
-			if(time == sd->time)
-				tfm = sd->ob_tfm;
-			else
-				tfm = object_fetch_transform_motion_test(kg, object, time, NULL);
+      if (time == sd->time)
+        tfm = sd->ob_tfm;
+      else
+        tfm = object_fetch_transform_motion_test(kg, object, time, NULL);
 #else
-			Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+      Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
 #endif
-			copy_matrix(result, tfm);
+      copy_matrix(result, tfm);
 
-			return true;
-		}
-		else if(sd->type == PRIMITIVE_LAMP) {
-			copy_matrix(result, sd->ob_tfm);
+      return true;
+    }
+    else if (sd->type == PRIMITIVE_LAMP) {
+      copy_matrix(result, sd->ob_tfm);
 
-			return true;
-		}
-	}
+      return true;
+    }
+  }
 
-	return false;
+  return false;
 }
 
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+                                           OSL::Matrix44 &result,
+                                           OSL::TransformationPtr xform,
+                                           float time)
 {
-	/* this is only used for shader and object space, we don't really have
-	 * a concept of shader space, so we just use object space for both. */
-	if(xform) {
-		const ShaderData *sd = (const ShaderData *)xform;
-		KernelGlobals *kg = sd->osl_globals;
-		int object = sd->object;
-
-		if(object != OBJECT_NONE) {
+  /* this is only used for shader and object space, we don't really have
+   * a concept of shader space, so we just use object space for both. */
+  if (xform) {
+    const ShaderData *sd = (const ShaderData *)xform;
+    KernelGlobals *kg = sd->osl_globals;
+    int object = sd->object;
+
+    if (object != OBJECT_NONE) {
 #ifdef __OBJECT_MOTION__
-			Transform itfm;
+      Transform itfm;
 
-			if(time == sd->time)
-				itfm = sd->ob_itfm;
-			else
-				object_fetch_transform_motion_test(kg, object, time, &itfm);
+      if (time == sd->time)
+        itfm = sd->ob_itfm;
+      else
+        object_fetch_transform_motion_test(kg, object, time, &itfm);
 #else
-			Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+      Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 #endif
-			copy_matrix(result, itfm);
+      copy_matrix(result, itfm);
 
-			return true;
-		}
-		else if(sd->type == PRIMITIVE_LAMP) {
-			copy_matrix(result, sd->ob_itfm);
+      return true;
+    }
+    else if (sd->type == PRIMITIVE_LAMP) {
+      copy_matrix(result, sd->ob_itfm);
 
-			return true;
-		}
-	}
+      return true;
+    }
+  }
 
-	return false;
+  return false;
 }
 
-bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
+                                   OSL::Matrix44 &result,
+                                   ustring from,
+                                   float time)
 {
-	KernelGlobals *kg = kernel_globals;
-
-	if(from == u_ndc) {
-		copy_matrix(result, kernel_data.cam.ndctoworld);
-		return true;
-	}
-	else if(from == u_raster) {
-		copy_matrix(result, kernel_data.cam.rastertoworld);
-		return true;
-	}
-	else if(from == u_screen) {
-		copy_matrix(result, kernel_data.cam.screentoworld);
-		return true;
-	}
-	else if(from == u_camera) {
-		copy_matrix(result, kernel_data.cam.cameratoworld);
-		return true;
-	}
-	else if(from == u_world) {
-		result.makeIdentity();
-		return true;
-	}
-
-	return false;
+  KernelGlobals *kg = kernel_globals;
+
+  if (from == u_ndc) {
+    copy_matrix(result, kernel_data.cam.ndctoworld);
+    return true;
+  }
+  else if (from == u_raster) {
+    copy_matrix(result, kernel_data.cam.rastertoworld);
+    return true;
+  }
+  else if (from == u_screen) {
+    copy_matrix(result, kernel_data.cam.screentoworld);
+    return true;
+  }
+  else if (from == u_camera) {
+    copy_matrix(result, kernel_data.cam.cameratoworld);
+    return true;
+  }
+  else if (from == u_world) {
+    result.makeIdentity();
+    return true;
+  }
+
+  return false;
 }
 
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+                                           OSL::Matrix44 &result,
+                                           ustring to,
+                                           float time)
 {
-	KernelGlobals *kg = kernel_globals;
-
-	if(to == u_ndc) {
-		copy_matrix(result, kernel_data.cam.worldtondc);
-		return true;
-	}
-	else if(to == u_raster) {
-		copy_matrix(result, kernel_data.cam.worldtoraster);
-		return true;
-	}
-	else if(to == u_screen) {
-		copy_matrix(result, kernel_data.cam.worldtoscreen);
-		return true;
-	}
-	else if(to == u_camera) {
-		copy_matrix(result, kernel_data.cam.worldtocamera);
-		return true;
-	}
-	else if(to == u_world) {
-		result.makeIdentity();
-		return true;
-	}
-
-	return false;
+  KernelGlobals *kg = kernel_globals;
+
+  if (to == u_ndc) {
+    copy_matrix(result, kernel_data.cam.worldtondc);
+    return true;
+  }
+  else if (to == u_raster) {
+    copy_matrix(result, kernel_data.cam.worldtoraster);
+    return true;
+  }
+  else if (to == u_screen) {
+    copy_matrix(result, kernel_data.cam.worldtoscreen);
+    return true;
+  }
+  else if (to == u_camera) {
+    copy_matrix(result, kernel_data.cam.worldtocamera);
+    return true;
+  }
+  else if (to == u_world) {
+    result.makeIdentity();
+    return true;
+  }
+
+  return false;
 }
 
-bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
+                                   OSL::Matrix44 &result,
+                                   OSL::TransformationPtr xform)
 {
-	/* this is only used for shader and object space, we don't really have
-	 * a concept of shader space, so we just use object space for both. */
-	if(xform) {
-		const ShaderData *sd = (const ShaderData *)xform;
-		int object = sd->object;
+  /* this is only used for shader and object space, we don't really have
+   * a concept of shader space, so we just use object space for both. */
+  if (xform) {
+    const ShaderData *sd = (const ShaderData *)xform;
+    int object = sd->object;
 
-		if(object != OBJECT_NONE) {
+    if (object != OBJECT_NONE) {
 #ifdef __OBJECT_MOTION__
-			Transform tfm = sd->ob_tfm;
+      Transform tfm = sd->ob_tfm;
 #else
-			KernelGlobals *kg = sd->osl_globals;
-			Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+      KernelGlobals *kg = sd->osl_globals;
+      Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
 #endif
-			copy_matrix(result, tfm);
+      copy_matrix(result, tfm);
 
-			return true;
-		}
-		else if(sd->type == PRIMITIVE_LAMP) {
-			copy_matrix(result, sd->ob_tfm);
+      return true;
+    }
+    else if (sd->type == PRIMITIVE_LAMP) {
+      copy_matrix(result, sd->ob_tfm);
 
-			return true;
-		}
-	}
+      return true;
+    }
+  }
 
-	return false;
+  return false;
 }
 
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+                                           OSL::Matrix44 &result,
+                                           OSL::TransformationPtr xform)
 {
-	/* this is only used for shader and object space, we don't really have
-	 * a concept of shader space, so we just use object space for both. */
-	if(xform) {
-		const ShaderData *sd = (const ShaderData *)xform;
-		int object = sd->object;
+  /* this is only used for shader and object space, we don't really have
+   * a concept of shader space, so we just use object space for both. */
+  if (xform) {
+    const ShaderData *sd = (const ShaderData *)xform;
+    int object = sd->object;
 
-		if(object != OBJECT_NONE) {
+    if (object != OBJECT_NONE) {
 #ifdef __OBJECT_MOTION__
-			Transform tfm = sd->ob_itfm;
+      Transform tfm = sd->ob_itfm;
 #else
-			KernelGlobals *kg = sd->osl_globals;
-			Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+      KernelGlobals *kg = sd->osl_globals;
+      Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 #endif
-			copy_matrix(result, tfm);
+      copy_matrix(result, tfm);
 
-			return true;
-		}
-		else if(sd->type == PRIMITIVE_LAMP) {
-			copy_matrix(result, sd->ob_itfm);
+      return true;
+    }
+    else if (sd->type == PRIMITIVE_LAMP) {
+      copy_matrix(result, sd->ob_itfm);
 
-			return true;
-		}
-	}
+      return true;
+    }
+  }
 
-	return false;
+  return false;
 }
 
 bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from)
 {
-	KernelGlobals *kg = kernel_globals;
-
-	if(from == u_ndc) {
-		copy_matrix(result, kernel_data.cam.ndctoworld);
-		return true;
-	}
-	else if(from == u_raster) {
-		copy_matrix(result, kernel_data.cam.rastertoworld);
-		return true;
-	}
-	else if(from == u_screen) {
-		copy_matrix(result, kernel_data.cam.screentoworld);
-		return true;
-	}
-	else if(from == u_camera) {
-		copy_matrix(result, kernel_data.cam.cameratoworld);
-		return true;
-	}
-
-	return false;
+  KernelGlobals *kg = kernel_globals;
+
+  if (from == u_ndc) {
+    copy_matrix(result, kernel_data.cam.ndctoworld);
+    return true;
+  }
+  else if (from == u_raster) {
+    copy_matrix(result, kernel_data.cam.rastertoworld);
+    return true;
+  }
+  else if (from == u_screen) {
+    copy_matrix(result, kernel_data.cam.screentoworld);
+    return true;
+  }
+  else if (from == u_camera) {
+    copy_matrix(result, kernel_data.cam.cameratoworld);
+    return true;
+  }
+
+  return false;
 }
 
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+                                           OSL::Matrix44 &result,
+                                           ustring to)
 {
-	KernelGlobals *kg = kernel_globals;
-
-	if(to == u_ndc) {
-		copy_matrix(result, kernel_data.cam.worldtondc);
-		return true;
-	}
-	else if(to == u_raster) {
-		copy_matrix(result, kernel_data.cam.worldtoraster);
-		return true;
-	}
-	else if(to == u_screen) {
-		copy_matrix(result, kernel_data.cam.worldtoscreen);
-		return true;
-	}
-	else if(to == u_camera) {
-		copy_matrix(result, kernel_data.cam.worldtocamera);
-		return true;
-	}
-
-	return false;
+  KernelGlobals *kg = kernel_globals;
+
+  if (to == u_ndc) {
+    copy_matrix(result, kernel_data.cam.worldtondc);
+    return true;
+  }
+  else if (to == u_raster) {
+    copy_matrix(result, kernel_data.cam.worldtoraster);
+    return true;
+  }
+  else if (to == u_screen) {
+    copy_matrix(result, kernel_data.cam.worldtoscreen);
+    return true;
+  }
+  else if (to == u_camera) {
+    copy_matrix(result, kernel_data.cam.worldtocamera);
+    return true;
+  }
+
+  return false;
 }
 
-bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives,
-                                            ustring object, TypeDesc type, ustring name,
-                                            int index, void *val)
+bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg,
+                                            bool derivatives,
+                                            ustring object,
+                                            TypeDesc type,
+                                            ustring name,
+                                            int index,
+                                            void *val)
 {
-	return false;
+  return false;
 }
 
 static bool set_attribute_float2(float2 f[3], TypeDesc type, bool derivatives, void *val)
 {
-	if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
-	   type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
-	{
-		float *fval = (float *)val;
-
-		fval[0] = f[0].x;
-		fval[1] = f[0].y;
-		fval[2] = 0.0f;
-
-		if(derivatives) {
-			fval[3] = f[1].x;
-			fval[4] = f[1].y;
-			fval[5] = 0.0f;
-
-			fval[6] = f[2].x;
-			fval[7] = f[2].y;
-			fval[8] = 0.0f;
-		}
-
-		return true;
-	}
-	else if(type == TypeDesc::TypeFloat) {
-		float *fval = (float *)val;
-		fval[0] = average(f[0]);
-
-		if(derivatives) {
-			fval[1] = average(f[1]);
-			fval[2] = average(f[2]);
-		}
-
-		return true;
-	}
-
-	return false;
+  if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+      type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
+    float *fval = (float *)val;
+
+    fval[0] = f[0].x;
+    fval[1] = f[0].y;
+    fval[2] = 0.0f;
+
+    if (derivatives) {
+      fval[3] = f[1].x;
+      fval[4] = f[1].y;
+      fval[5] = 0.0f;
+
+      fval[6] = f[2].x;
+      fval[7] = f[2].y;
+      fval[8] = 0.0f;
+    }
+
+    return true;
+  }
+  else if (type == TypeDesc::TypeFloat) {
+    float *fval = (float *)val;
+    fval[0] = average(f[0]);
+
+    if (derivatives) {
+      fval[1] = average(f[1]);
+      fval[2] = average(f[2]);
+    }
+
+    return true;
+  }
+
+  return false;
 }
 
 static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, void *val)
 {
-	if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
-	   type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
-	{
-		float *fval = (float *)val;
-
-		fval[0] = f[0].x;
-		fval[1] = f[0].y;
-		fval[2] = f[0].z;
-
-		if(derivatives) {
-			fval[3] = f[1].x;
-			fval[4] = f[1].y;
-			fval[5] = f[1].z;
-
-			fval[6] = f[2].x;
-			fval[7] = f[2].y;
-			fval[8] = f[2].z;
-		}
-
-		return true;
-	}
-	else if(type == TypeDesc::TypeFloat) {
-		float *fval = (float *)val;
-		fval[0] = average(f[0]);
-
-		if(derivatives) {
-			fval[1] = average(f[1]);
-			fval[2] = average(f[2]);
-		}
-
-		return true;
-	}
-
-	return false;
+  if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+      type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
+    float *fval = (float *)val;
+
+    fval[0] = f[0].x;
+    fval[1] = f[0].y;
+    fval[2] = f[0].z;
+
+    if (derivatives) {
+      fval[3] = f[1].x;
+      fval[4] = f[1].y;
+      fval[5] = f[1].z;
+
+      fval[6] = f[2].x;
+      fval[7] = f[2].y;
+      fval[8] = f[2].z;
+    }
+
+    return true;
+  }
+  else if (type == TypeDesc::TypeFloat) {
+    float *fval = (float *)val;
+    fval[0] = average(f[0]);
+
+    if (derivatives) {
+      fval[1] = average(f[1]);
+      fval[2] = average(f[2]);
+    }
+
+    return true;
+  }
+
+  return false;
 }
 
 static bool set_attribute_float3(float3 f, TypeDesc type, bool derivatives, void *val)
 {
-	float3 fv[3];
+  float3 fv[3];
 
-	fv[0] = f;
-	fv[1] = make_float3(0.0f, 0.0f, 0.0f);
-	fv[2] = make_float3(0.0f, 0.0f, 0.0f);
+  fv[0] = f;
+  fv[1] = make_float3(0.0f, 0.0f, 0.0f);
+  fv[2] = make_float3(0.0f, 0.0f, 0.0f);
 
-	return set_attribute_float3(fv, type, derivatives, val);
+  return set_attribute_float3(fv, type, derivatives, val);
 }
 
 static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, void *val)
 {
-	if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
-	   type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
-	{
-		float *fval = (float *)val;
-		fval[0] = f[0];
-		fval[1] = f[1];
-		fval[2] = f[2];
-
-		if(derivatives) {
-			fval[3] = f[1];
-			fval[4] = f[1];
-			fval[5] = f[1];
-
-			fval[6] = f[2];
-			fval[7] = f[2];
-			fval[8] = f[2];
-		}
-
-		return true;
-	}
-	else if(type == TypeDesc::TypeFloat) {
-		float *fval = (float *)val;
-		fval[0] = f[0];
-
-		if(derivatives) {
-			fval[1] = f[1];
-			fval[2] = f[2];
-		}
-
-		return true;
-	}
-
-	return false;
+  if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+      type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
+    float *fval = (float *)val;
+    fval[0] = f[0];
+    fval[1] = f[1];
+    fval[2] = f[2];
+
+    if (derivatives) {
+      fval[3] = f[1];
+      fval[4] = f[1];
+      fval[5] = f[1];
+
+      fval[6] = f[2];
+      fval[7] = f[2];
+      fval[8] = f[2];
+    }
+
+    return true;
+  }
+  else if (type == TypeDesc::TypeFloat) {
+    float *fval = (float *)val;
+    fval[0] = f[0];
+
+    if (derivatives) {
+      fval[1] = f[1];
+      fval[2] = f[2];
+    }
+
+    return true;
+  }
+
+  return false;
 }
 
 static bool set_attribute_float(float f, TypeDesc type, bool derivatives, void *val)
 {
-	float fv[3];
+  float fv[3];
 
-	fv[0] = f;
-	fv[1] = 0.0f;
-	fv[2] = 0.0f;
+  fv[0] = f;
+  fv[1] = 0.0f;
+  fv[2] = 0.0f;
 
-	return set_attribute_float(fv, type, derivatives, val);
+  return set_attribute_float(fv, type, derivatives, val);
 }
 
 static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val)
 {
-	if(type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
-		int *ival = (int *)val;
-		ival[0] = i;
+  if (type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
+    int *ival = (int *)val;
+    ival[0] = i;
 
-		if(derivatives) {
-			ival[1] = 0;
-			ival[2] = 0;
-		}
+    if (derivatives) {
+      ival[1] = 0;
+      ival[2] = 0;
+    }
 
-		return true;
-	}
+    return true;
+  }
 
-	return false;
+  return false;
 }
 
 static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, void *val)
 {
-	if(type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
-		ustring *sval = (ustring *)val;
-		sval[0] = str;
+  if (type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR &&
+      type.arraylen == 0) {
+    ustring *sval = (ustring *)val;
+    sval[0] = str;
 
-		if(derivatives) {
-			sval[1] = OSLRenderServices::u_empty;
-			sval[2] = OSLRenderServices::u_empty;
-		}
+    if (derivatives) {
+      sval[1] = OSLRenderServices::u_empty;
+      sval[2] = OSLRenderServices::u_empty;
+    }
 
-		return true;
-	}
+    return true;
+  }
 
-	return false;
+  return false;
 }
 
 static bool set_attribute_float3_3(float3 P[3], TypeDesc type, bool derivatives, void *val)
 {
-	if(type.vecsemantics == TypeDesc::POINT && type.arraylen >= 3) {
-		float *fval = (float *)val;
+  if (type.vecsemantics == TypeDesc::POINT && type.arraylen >= 3) {
+    float *fval = (float *)val;
 
-		fval[0] = P[0].x;
-		fval[1] = P[0].y;
-		fval[2] = P[0].z;
+    fval[0] = P[0].x;
+    fval[1] = P[0].y;
+    fval[2] = P[0].z;
 
-		fval[3] = P[1].x;
-		fval[4] = P[1].y;
-		fval[5] = P[1].z;
+    fval[3] = P[1].x;
+    fval[4] = P[1].y;
+    fval[5] = P[1].z;
 
-		fval[6] = P[2].x;
-		fval[7] = P[2].y;
-		fval[8] = P[2].z;
+    fval[6] = P[2].x;
+    fval[7] = P[2].y;
+    fval[8] = P[2].z;
 
-		if(type.arraylen > 3)
-			memset(fval + 3*3, 0, sizeof(float)*3*(type.arraylen - 3));
-		if(derivatives)
-			memset(fval + type.arraylen*3, 0, sizeof(float)*2*3*type.arraylen);
+    if (type.arraylen > 3)
+      memset(fval + 3 * 3, 0, sizeof(float) * 3 * (type.arraylen - 3));
+    if (derivatives)
+      memset(fval + type.arraylen * 3, 0, sizeof(float) * 2 * 3 * type.arraylen);
 
-		return true;
-	}
+    return true;
+  }
 
-	return false;
+  return false;
 }
 
-static bool set_attribute_matrix(const Transform& tfm, TypeDesc type, void *val)
+static bool set_attribute_matrix(const Transform &tfm, TypeDesc type, void *val)
 {
-	if(type == TypeDesc::TypeMatrix) {
-		copy_matrix(*(OSL::Matrix44*)val, tfm);
-		return true;
-	}
+  if (type == TypeDesc::TypeMatrix) {
+    copy_matrix(*(OSL::Matrix44 *)val, tfm);
+    return true;
+  }
 
-	return false;
+  return false;
 }
 
-static bool get_primitive_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr,
-                               const TypeDesc& type, bool derivatives, void *val)
+static bool get_primitive_attribute(KernelGlobals *kg,
+                                    const ShaderData *sd,
+                                    const OSLGlobals::Attribute &attr,
+                                    const TypeDesc &type,
+                                    bool derivatives,
+                                    void *val)
 {
-	if(attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
-	   attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor)
-	{
-		float3 fval[3];
-		fval[0] = primitive_attribute_float3(kg, sd, attr.desc,
-		                                     (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
-		return set_attribute_float3(fval, type, derivatives, val);
-	}
-	else if(attr.type == TypeFloat2) {
-		float2 fval[2];
-		fval[0] = primitive_attribute_float2(kg, sd, attr.desc,
-		                                      (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
-		return set_attribute_float2(fval, type, derivatives, val);
-	}
-	else if(attr.type == TypeDesc::TypeFloat) {
-		float fval[3];
-		fval[0] = primitive_attribute_float(kg, sd, attr.desc,
-		                                    (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
-		return set_attribute_float(fval, type, derivatives, val);
-	}
-	else {
-		return false;
-	}
+  if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
+      attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) {
+    float3 fval[3];
+    fval[0] = primitive_attribute_float3(
+        kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+    return set_attribute_float3(fval, type, derivatives, val);
+  }
+  else if (attr.type == TypeFloat2) {
+    float2 fval[2];
+    fval[0] = primitive_attribute_float2(
+        kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+    return set_attribute_float2(fval, type, derivatives, val);
+  }
+  else if (attr.type == TypeDesc::TypeFloat) {
+    float fval[3];
+    fval[0] = primitive_attribute_float(
+        kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+    return set_attribute_float(fval, type, derivatives, val);
+  }
+  else {
+    return false;
+  }
 }
 
-static bool get_mesh_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr,
-                               const TypeDesc& type, bool derivatives, void *val)
+static bool get_mesh_attribute(KernelGlobals *kg,
+                               const ShaderData *sd,
+                               const OSLGlobals::Attribute &attr,
+                               const TypeDesc &type,
+                               bool derivatives,
+                               void *val)
 {
-	if(attr.type == TypeDesc::TypeMatrix) {
-		Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc);
-		return set_attribute_matrix(tfm, type, val);
-	}
-	else {
-		return false;
-	}
+  if (attr.type == TypeDesc::TypeMatrix) {
+    Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc);
+    return set_attribute_matrix(tfm, type, val);
+  }
+  else {
+    return false;
+  }
 }
 
-static void get_object_attribute(const OSLGlobals::Attribute& attr, bool derivatives, void *val)
+static void get_object_attribute(const OSLGlobals::Attribute &attr, bool derivatives, void *val)
 {
-	size_t datasize = attr.value.datasize();
+  size_t datasize = attr.value.datasize();
 
-	memcpy(val, attr.value.data(), datasize);
-	if(derivatives)
-		memset((char *)val + datasize, 0, datasize * 2);
+  memcpy(val, attr.value.data(), datasize);
+  if (derivatives)
+    memset((char *)val + datasize, 0, datasize * 2);
 }
 
-bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
-                                                      TypeDesc type, bool derivatives, void *val)
+bool OSLRenderServices::get_object_standard_attribute(
+    KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val)
 {
-	/* todo: turn this into hash table? */
-
-	/* Object Attributes */
-	if(name == u_object_location) {
-		float3 f = object_location(kg, sd);
-		return set_attribute_float3(f, type, derivatives, val);
-	}
-	else if(name == u_object_index) {
-		float f = object_pass_id(kg, sd->object);
-		return set_attribute_float(f, type, derivatives, val);
-	}
-	else if(name == u_geom_dupli_generated) {
-		float3 f = object_dupli_generated(kg, sd->object);
-		return set_attribute_float3(f, type, derivatives, val);
-	}
-	else if(name == u_geom_dupli_uv) {
-		float3 f = object_dupli_uv(kg, sd->object);
-		return set_attribute_float3(f, type, derivatives, val);
-	}
-	else if(name == u_material_index) {
-		float f = shader_pass_id(kg, sd);
-		return set_attribute_float(f, type, derivatives, val);
-	}
-	else if(name == u_object_random) {
-		float f = object_random_number(kg, sd->object);
-		return set_attribute_float(f, type, derivatives, val);
-	}
-
-	/* Particle Attributes */
-	else if(name == u_particle_index) {
-		int particle_id = object_particle_id(kg, sd->object);
-		float f = particle_index(kg, particle_id);
-		return set_attribute_float(f, type, derivatives, val);
-	}
-	else if(name == u_particle_random) {
-		int particle_id = object_particle_id(kg, sd->object);
-		float f = hash_int_01(particle_index(kg, particle_id));
-		return set_attribute_float(f, type, derivatives, val);
-	}
-
-	else if(name == u_particle_age) {
-		int particle_id = object_particle_id(kg, sd->object);
-		float f = particle_age(kg, particle_id);
-		return set_attribute_float(f, type, derivatives, val);
-	}
-	else if(name == u_particle_lifetime) {
-		int particle_id = object_particle_id(kg, sd->object);
-		float f = particle_lifetime(kg, particle_id);
-		return set_attribute_float(f, type, derivatives, val);
-	}
-	else if(name == u_particle_location) {
-		int particle_id = object_particle_id(kg, sd->object);
-		float3 f = particle_location(kg, particle_id);
-		return set_attribute_float3(f, type, derivatives, val);
-	}
-#if 0	/* unsupported */
-	else if(name == u_particle_rotation) {
-		int particle_id = object_particle_id(kg, sd->object);
-		float4 f = particle_rotation(kg, particle_id);
-		return set_attribute_float4(f, type, derivatives, val);
-	}
+  /* todo: turn this into hash table? */
+
+  /* Object Attributes */
+  if (name == u_object_location) {
+    float3 f = object_location(kg, sd);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+  else if (name == u_object_index) {
+    float f = object_pass_id(kg, sd->object);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_geom_dupli_generated) {
+    float3 f = object_dupli_generated(kg, sd->object);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+  else if (name == u_geom_dupli_uv) {
+    float3 f = object_dupli_uv(kg, sd->object);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+  else if (name == u_material_index) {
+    float f = shader_pass_id(kg, sd);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_object_random) {
+    float f = object_random_number(kg, sd->object);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+
+  /* Particle Attributes */
+  else if (name == u_particle_index) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float f = particle_index(kg, particle_id);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_particle_random) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float f = hash_int_01(particle_index(kg, particle_id));
+    return set_attribute_float(f, type, derivatives, val);
+  }
+
+  else if (name == u_particle_age) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float f = particle_age(kg, particle_id);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_particle_lifetime) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float f = particle_lifetime(kg, particle_id);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_particle_location) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float3 f = particle_location(kg, particle_id);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+#if 0 /* unsupported */
+  else if(name == u_particle_rotation) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float4 f = particle_rotation(kg, particle_id);
+    return set_attribute_float4(f, type, derivatives, val);
+  }
 #endif
-	else if(name == u_particle_size) {
-		int particle_id = object_particle_id(kg, sd->object);
-		float f = particle_size(kg, particle_id);
-		return set_attribute_float(f, type, derivatives, val);
-	}
-	else if(name == u_particle_velocity) {
-		int particle_id = object_particle_id(kg, sd->object);
-		float3 f = particle_velocity(kg, particle_id);
-		return set_attribute_float3(f, type, derivatives, val);
-	}
-	else if(name == u_particle_angular_velocity) {
-		int particle_id = object_particle_id(kg, sd->object);
-		float3 f = particle_angular_velocity(kg, particle_id);
-		return set_attribute_float3(f, type, derivatives, val);
-	}
-
-	/* Geometry Attributes */
-	else if(name == u_geom_numpolyvertices) {
-		return set_attribute_int(3, type, derivatives, val);
-	}
-	else if((name == u_geom_trianglevertices || name == u_geom_polyvertices)
-		     && sd->type & PRIMITIVE_ALL_TRIANGLE)
-	{
-		float3 P[3];
-
-		if(sd->type & PRIMITIVE_TRIANGLE)
-			triangle_vertices(kg, sd->prim, P);
-		else
-			motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P);
-
-		if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-			object_position_transform(kg, sd, &P[0]);
-			object_position_transform(kg, sd, &P[1]);
-			object_position_transform(kg, sd, &P[2]);
-		}
-
-		return set_attribute_float3_3(P, type, derivatives, val);
-	}
-	else if(name == u_geom_name) {
-		ustring object_name = kg->osl->object_names[sd->object];
-		return set_attribute_string(object_name, type, derivatives, val);
-	}
-	else if(name == u_is_smooth) {
-		float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0);
-		return set_attribute_float(f, type, derivatives, val);
-	}
-	/* Hair Attributes */
-	else if(name == u_is_curve) {
-		float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
-		return set_attribute_float(f, type, derivatives, val);
-	}
-	else if(name == u_curve_thickness) {
-		float f = curve_thickness(kg, sd);
-		return set_attribute_float(f, type, derivatives, val);
-	}
-	else if(name == u_curve_tangent_normal) {
-		float3 f = curve_tangent_normal(kg, sd);
-		return set_attribute_float3(f, type, derivatives, val);
-	}
-	else
-		return false;
+  else if (name == u_particle_size) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float f = particle_size(kg, particle_id);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_particle_velocity) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float3 f = particle_velocity(kg, particle_id);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+  else if (name == u_particle_angular_velocity) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float3 f = particle_angular_velocity(kg, particle_id);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+
+  /* Geometry Attributes */
+  else if (name == u_geom_numpolyvertices) {
+    return set_attribute_int(3, type, derivatives, val);
+  }
+  else if ((name == u_geom_trianglevertices || name == u_geom_polyvertices) &&
+           sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    float3 P[3];
+
+    if (sd->type & PRIMITIVE_TRIANGLE)
+      triangle_vertices(kg, sd->prim, P);
+    else
+      motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P);
+
+    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+      object_position_transform(kg, sd, &P[0]);
+      object_position_transform(kg, sd, &P[1]);
+      object_position_transform(kg, sd, &P[2]);
+    }
+
+    return set_attribute_float3_3(P, type, derivatives, val);
+  }
+  else if (name == u_geom_name) {
+    ustring object_name = kg->osl->object_names[sd->object];
+    return set_attribute_string(object_name, type, derivatives, val);
+  }
+  else if (name == u_is_smooth) {
+    float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  /* Hair Attributes */
+  else if (name == u_is_curve) {
+    float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_curve_thickness) {
+    float f = curve_thickness(kg, sd);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_curve_tangent_normal) {
+    float3 f = curve_tangent_normal(kg, sd);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+  else
+    return false;
 }
 
-bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
-                                                 TypeDesc type, bool derivatives, void *val)
+bool OSLRenderServices::get_background_attribute(
+    KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val)
 {
-	if(name == u_path_ray_length) {
-		/* Ray Length */
-		float f = sd->ray_length;
-		return set_attribute_float(f, type, derivatives, val);
-	}
-	else if(name == u_path_ray_depth) {
-		/* Ray Depth */
-		PathState *state = sd->osl_path_state;
-		int f = state->bounce;
-		return set_attribute_int(f, type, derivatives, val);
-	}
-	else if(name == u_path_diffuse_depth) {
-		/* Diffuse Ray Depth */
-		PathState *state = sd->osl_path_state;
-		int f = state->diffuse_bounce;
-		return set_attribute_int(f, type, derivatives, val);
-	}
-	else if(name == u_path_glossy_depth) {
-		/* Glossy Ray Depth */
-		PathState *state = sd->osl_path_state;
-		int f = state->glossy_bounce;
-		return set_attribute_int(f, type, derivatives, val);
-	}
-	else if(name == u_path_transmission_depth) {
-		/* Transmission Ray Depth */
-		PathState *state = sd->osl_path_state;
-		int f = state->transmission_bounce;
-		return set_attribute_int(f, type, derivatives, val);
-	}
-	else if(name == u_path_transparent_depth) {
-		/* Transparent Ray Depth */
-		PathState *state = sd->osl_path_state;
-		int f = state->transparent_bounce;
-		return set_attribute_int(f, type, derivatives, val);
-	}
-	else if(name == u_path_transmission_depth) {
-		/* Transmission Ray Depth */
-		PathState *state = sd->osl_path_state;
-		int f = state->transmission_bounce;
-		return set_attribute_int(f, type, derivatives, val);
-	}
-	else if(name == u_ndc) {
-		/* NDC coordinates with special exception for otho */
-		OSLThreadData *tdata = kg->osl_tdata;
-		OSL::ShaderGlobals *globals = &tdata->globals;
-		float3 ndc[3];
-
-		if((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
-			ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
-
-			if(derivatives) {
-				ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx) - ndc[0];
-				ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy) - ndc[0];
-			}
-		}
-		else {
-			ndc[0] = camera_world_to_ndc(kg, sd, sd->P);
-
-			if(derivatives) {
-				ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0];
-				ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0];
-			}
-		}
-
-		return set_attribute_float3(ndc, type, derivatives, val);
-	}
-	else
-		return false;
+  if (name == u_path_ray_length) {
+    /* Ray Length */
+    float f = sd->ray_length;
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_path_ray_depth) {
+    /* Ray Depth */
+    PathState *state = sd->osl_path_state;
+    int f = state->bounce;
+    return set_attribute_int(f, type, derivatives, val);
+  }
+  else if (name == u_path_diffuse_depth) {
+    /* Diffuse Ray Depth */
+    PathState *state = sd->osl_path_state;
+    int f = state->diffuse_bounce;
+    return set_attribute_int(f, type, derivatives, val);
+  }
+  else if (name == u_path_glossy_depth) {
+    /* Glossy Ray Depth */
+    PathState *state = sd->osl_path_state;
+    int f = state->glossy_bounce;
+    return set_attribute_int(f, type, derivatives, val);
+  }
+  else if (name == u_path_transmission_depth) {
+    /* Transmission Ray Depth */
+    PathState *state = sd->osl_path_state;
+    int f = state->transmission_bounce;
+    return set_attribute_int(f, type, derivatives, val);
+  }
+  else if (name == u_path_transparent_depth) {
+    /* Transparent Ray Depth */
+    PathState *state = sd->osl_path_state;
+    int f = state->transparent_bounce;
+    return set_attribute_int(f, type, derivatives, val);
+  }
+  else if (name == u_path_transmission_depth) {
+    /* Transmission Ray Depth */
+    PathState *state = sd->osl_path_state;
+    int f = state->transmission_bounce;
+    return set_attribute_int(f, type, derivatives, val);
+  }
+  else if (name == u_ndc) {
+    /* NDC coordinates with special exception for otho */
+    OSLThreadData *tdata = kg->osl_tdata;
+    OSL::ShaderGlobals *globals = &tdata->globals;
+    float3 ndc[3];
+
+    if ((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+        kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
+      ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
+
+      if (derivatives) {
+        ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx) - ndc[0];
+        ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy) - ndc[0];
+      }
+    }
+    else {
+      ndc[0] = camera_world_to_ndc(kg, sd, sd->P);
+
+      if (derivatives) {
+        ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0];
+        ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0];
+      }
+    }
+
+    return set_attribute_float3(ndc, type, derivatives, val);
+  }
+  else
+    return false;
 }
 
-bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object_name,
-                                      TypeDesc type, ustring name, void *val)
+bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg,
+                                      bool derivatives,
+                                      ustring object_name,
+                                      TypeDesc type,
+                                      ustring name,
+                                      void *val)
 {
-	if(sg == NULL || sg->renderstate == NULL)
-		return false;
+  if (sg == NULL || sg->renderstate == NULL)
+    return false;
 
-	ShaderData *sd = (ShaderData *)(sg->renderstate);
-	return get_attribute(sd, derivatives, object_name, type, name, val);
+  ShaderData *sd = (ShaderData *)(sg->renderstate);
+  return get_attribute(sd, derivatives, object_name, type, name, val);
 }
 
-bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring object_name,
-                                      TypeDesc type, ustring name, void *val)
+bool OSLRenderServices::get_attribute(
+    ShaderData *sd, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val)
 {
-	KernelGlobals *kg = sd->osl_globals;
-	int prim_type = 0;
-	int object;
-
-	/* lookup of attribute on another object */
-	if(object_name != u_empty) {
-		OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name);
-
-		if(it == kg->osl->object_name_map.end())
-			return false;
-
-		object = it->second;
-	}
-	else {
-		object = sd->object;
-		prim_type = attribute_primitive_type(kg, sd);
-
-		if(object == OBJECT_NONE)
-			return get_background_attribute(kg, sd, name, type, derivatives, val);
-	}
-
-	/* find attribute on object */
-	object = object*ATTR_PRIM_TYPES + prim_type;
-	OSLGlobals::AttributeMap& attribute_map = kg->osl->attribute_map[object];
-	OSLGlobals::AttributeMap::iterator it = attribute_map.find(name);
-
-	if(it != attribute_map.end()) {
-		const OSLGlobals::Attribute& attr = it->second;
-
-		if(attr.desc.element != ATTR_ELEMENT_OBJECT) {
-			/* triangle and vertex attributes */
-			if(get_primitive_attribute(kg, sd, attr, type, derivatives, val))
-				return true;
-			else
-				return get_mesh_attribute(kg, sd, attr, type, derivatives, val);
-		}
-		else {
-			/* object attribute */
-			get_object_attribute(attr, derivatives, val);
-			return true;
-		}
-	}
-	else {
-		/* not found in attribute, check standard object info */
-		bool is_std_object_attribute = get_object_standard_attribute(kg, sd, name, type, derivatives, val);
-
-		if(is_std_object_attribute)
-			return true;
-
-		return get_background_attribute(kg, sd, name, type, derivatives, val);
-	}
-
-	return false;
+  KernelGlobals *kg = sd->osl_globals;
+  int prim_type = 0;
+  int object;
+
+  /* lookup of attribute on another object */
+  if (object_name != u_empty) {
+    OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name);
+
+    if (it == kg->osl->object_name_map.end())
+      return false;
+
+    object = it->second;
+  }
+  else {
+    object = sd->object;
+    prim_type = attribute_primitive_type(kg, sd);
+
+    if (object == OBJECT_NONE)
+      return get_background_attribute(kg, sd, name, type, derivatives, val);
+  }
+
+  /* find attribute on object */
+  object = object * ATTR_PRIM_TYPES + prim_type;
+  OSLGlobals::AttributeMap &attribute_map = kg->osl->attribute_map[object];
+  OSLGlobals::AttributeMap::iterator it = attribute_map.find(name);
+
+  if (it != attribute_map.end()) {
+    const OSLGlobals::Attribute &attr = it->second;
+
+    if (attr.desc.element != ATTR_ELEMENT_OBJECT) {
+      /* triangle and vertex attributes */
+      if (get_primitive_attribute(kg, sd, attr, type, derivatives, val))
+        return true;
+      else
+        return get_mesh_attribute(kg, sd, attr, type, derivatives, val);
+    }
+    else {
+      /* object attribute */
+      get_object_attribute(attr, derivatives, val);
+      return true;
+    }
+  }
+  else {
+    /* not found in attribute, check standard object info */
+    bool is_std_object_attribute = get_object_standard_attribute(
+        kg, sd, name, type, derivatives, val);
+
+    if (is_std_object_attribute)
+      return true;
+
+    return get_background_attribute(kg, sd, name, type, derivatives, val);
+  }
+
+  return false;
 }
 
-bool OSLRenderServices::get_userdata(bool derivatives, ustring name, TypeDesc type,
-                                     OSL::ShaderGlobals *sg, void *val)
+bool OSLRenderServices::get_userdata(
+    bool derivatives, ustring name, TypeDesc type, OSL::ShaderGlobals *sg, void *val)
 {
-	return false; /* disabled by lockgeom */
+  return false; /* disabled by lockgeom */
 }
 
 TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename)
 {
-	if(filename.length() && filename[0] == '@') {
-		/* Dummy, we don't use texture handles for builtin textures but need
-		 * to tell the OSL runtime optimizer that this is a valid texture. */
-		return NULL;
-	}
-	else {
-		return texturesys()->get_texture_handle(filename);
-	}
+  if (filename.length() && filename[0] == '@') {
+    /* Dummy, we don't use texture handles for builtin textures but need
+     * to tell the OSL runtime optimizer that this is a valid texture. */
+    return NULL;
+  }
+  else {
+    return texturesys()->get_texture_handle(filename);
+  }
 }
 
 bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle)
 {
-	return texturesys()->good(texture_handle);
+  return texturesys()->good(texture_handle);
 }
 
 bool OSLRenderServices::texture(ustring filename,
@@ -945,157 +976,169 @@ bool OSLRenderServices::texture(ustring filename,
                                 TexturePerthread *texture_thread_info,
                                 TextureOpt &options,
                                 OSL::ShaderGlobals *sg,
-                                float s, float t,
-                                float dsdx, float dtdx, float dsdy, float dtdy,
+                                float s,
+                                float t,
+                                float dsdx,
+                                float dtdx,
+                                float dsdy,
+                                float dtdy,
                                 int nchannels,
                                 float *result,
                                 float *dresultds,
                                 float *dresultdt,
                                 ustring *errormessage)
 {
-	OSL::TextureSystem *ts = osl_ts;
-	ShaderData *sd = (ShaderData *)(sg->renderstate);
-	KernelGlobals *kg = sd->osl_globals;
+  OSL::TextureSystem *ts = osl_ts;
+  ShaderData *sd = (ShaderData *)(sg->renderstate);
+  KernelGlobals *kg = sd->osl_globals;
 
-	if(texture_thread_info == NULL) {
-		OSLThreadData *tdata = kg->osl_tdata;
-		texture_thread_info = tdata->oiio_thread_info;
-	}
+  if (texture_thread_info == NULL) {
+    OSLThreadData *tdata = kg->osl_tdata;
+    texture_thread_info = tdata->oiio_thread_info;
+  }
 
 #ifdef WITH_PTEX
-	/* todo: this is just a quick hack, only works with particular files and options */
-	if(string_endswith(filename.string(), ".ptx")) {
-		float2 uv;
-		int faceid;
+  /* todo: this is just a quick hack, only works with particular files and options */
+  if (string_endswith(filename.string(), ".ptx")) {
+    float2 uv;
+    int faceid;
 
-		if(!primitive_ptex(kg, sd, &uv, &faceid))
-			return false;
+    if (!primitive_ptex(kg, sd, &uv, &faceid))
+      return false;
 
-		float u = uv.x;
-		float v = uv.y;
-		float dudx = 0.0f;
-		float dvdx = 0.0f;
-		float dudy = 0.0f;
-		float dvdy = 0.0f;
+    float u = uv.x;
+    float v = uv.y;
+    float dudx = 0.0f;
+    float dvdx = 0.0f;
+    float dudy = 0.0f;
+    float dvdy = 0.0f;
 
-		Ptex::String error;
-		PtexPtr<PtexTexture> r(ptex_cache->get(filename.c_str(), error));
+    Ptex::String error;
+    PtexPtr<PtexTexture> r(ptex_cache->get(filename.c_str(), error));
 
-		if(!r) {
-			//std::cerr << error.c_str() << std::endl;
-			return false;
-		}
+    if (!r) {
+      //std::cerr << error.c_str() << std::endl;
+      return false;
+    }
 
-		bool mipmaplerp = false;
-		float sharpness = 1.0f;
-		PtexFilter::Options opts(PtexFilter::f_bicubic, mipmaplerp, sharpness);
-		PtexPtr<PtexFilter> f(PtexFilter::getFilter(r, opts));
+    bool mipmaplerp = false;
+    float sharpness = 1.0f;
+    PtexFilter::Options opts(PtexFilter::f_bicubic, mipmaplerp, sharpness);
+    PtexPtr<PtexFilter> f(PtexFilter::getFilter(r, opts));
 
-		f->eval(result, options.firstchannel, nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy);
+    f->eval(result, options.firstchannel, nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy);
 
-		for(int c = r->numChannels(); c < nchannels; c++)
-			result[c] = result[0];
+    for (int c = r->numChannels(); c < nchannels; c++)
+      result[c] = result[0];
 
-		return true;
-	}
+    return true;
+  }
 #endif
-	bool status = false;
-
-	if(filename.length() && filename[0] == '@') {
-		if(filename == u_at_bevel) {
-			/* Bevel shader hack. */
-			if(nchannels >= 3) {
-				PathState *state = sd->osl_path_state;
-				int num_samples = (int)s;
-				float radius = t;
-				float3 N = svm_bevel(kg, sd, state, radius, num_samples);
-				result[0] = N.x;
-				result[1] = N.y;
-				result[2] = N.z;
-				status = true;
-			}
-		}
-		else if(filename == u_at_ao) {
-			/* AO shader hack. */
-			PathState *state = sd->osl_path_state;
-			int num_samples = (int)s;
-			float radius = t;
-			float3 N = make_float3(dsdx, dtdx, dsdy);
-			int flags = 0;
-			if((int)dtdy) {
-				flags |= NODE_AO_INSIDE;
-			}
-			if((int)options.sblur) {
-				flags |= NODE_AO_ONLY_LOCAL;
-			}
-			if((int)options.tblur) {
-				flags |= NODE_AO_GLOBAL_RADIUS;
-			}
-			result[0] = svm_ao(kg, sd, N, state, radius, num_samples, flags);
-			status = true;
-		}
-		else if(filename[1] == 'l') {
-			/* IES light. */
-			int slot = atoi(filename.c_str() + 2);
-			result[0] = kernel_ies_interp(kg, slot, s, t);
-			status = true;
-		}
-		else {
-			/* Packed texture. */
-			int slot = atoi(filename.c_str() + 2);
-			float4 rgba = kernel_tex_image_interp(kg, slot, s, 1.0f - t);
-
-			result[0] = rgba[0];
-			if(nchannels > 1)
-				result[1] = rgba[1];
-			if(nchannels > 2)
-				result[2] = rgba[2];
-			if(nchannels > 3)
-				result[3] = rgba[3];
-			status = true;
-		}
-	}
-	else {
-		if(texture_handle != NULL) {
-			status = ts->texture(texture_handle,
-			                     texture_thread_info,
-			                     options,
-			                     s, t,
-			                     dsdx, dtdx,
-			                     dsdy, dtdy,
-			                     nchannels,
-			                     result,
-			                     dresultds, dresultdt);
-		}
-		else {
-			status = ts->texture(filename,
-			                     options,
-			                     s, t,
-			                     dsdx, dtdx,
-			                     dsdy, dtdy,
-			                     nchannels,
-			                     result,
-			                     dresultds, dresultdt);
-		}
-	}
-
-	if(!status) {
-		if(nchannels == 3 || nchannels == 4) {
-			result[0] = 1.0f;
-			result[1] = 0.0f;
-			result[2] = 1.0f;
-
-			if(nchannels == 4)
-				result[3] = 1.0f;
-		}
-		/* This might be slow, but prevents error messages leak and
-		 * other nasty stuff happening.
-		 */
-		string err = ts->geterror();
-		(void) err;
-	}
-
-	return status;
+  bool status = false;
+
+  if (filename.length() && filename[0] == '@') {
+    if (filename == u_at_bevel) {
+      /* Bevel shader hack. */
+      if (nchannels >= 3) {
+        PathState *state = sd->osl_path_state;
+        int num_samples = (int)s;
+        float radius = t;
+        float3 N = svm_bevel(kg, sd, state, radius, num_samples);
+        result[0] = N.x;
+        result[1] = N.y;
+        result[2] = N.z;
+        status = true;
+      }
+    }
+    else if (filename == u_at_ao) {
+      /* AO shader hack. */
+      PathState *state = sd->osl_path_state;
+      int num_samples = (int)s;
+      float radius = t;
+      float3 N = make_float3(dsdx, dtdx, dsdy);
+      int flags = 0;
+      if ((int)dtdy) {
+        flags |= NODE_AO_INSIDE;
+      }
+      if ((int)options.sblur) {
+        flags |= NODE_AO_ONLY_LOCAL;
+      }
+      if ((int)options.tblur) {
+        flags |= NODE_AO_GLOBAL_RADIUS;
+      }
+      result[0] = svm_ao(kg, sd, N, state, radius, num_samples, flags);
+      status = true;
+    }
+    else if (filename[1] == 'l') {
+      /* IES light. */
+      int slot = atoi(filename.c_str() + 2);
+      result[0] = kernel_ies_interp(kg, slot, s, t);
+      status = true;
+    }
+    else {
+      /* Packed texture. */
+      int slot = atoi(filename.c_str() + 2);
+      float4 rgba = kernel_tex_image_interp(kg, slot, s, 1.0f - t);
+
+      result[0] = rgba[0];
+      if (nchannels > 1)
+        result[1] = rgba[1];
+      if (nchannels > 2)
+        result[2] = rgba[2];
+      if (nchannels > 3)
+        result[3] = rgba[3];
+      status = true;
+    }
+  }
+  else {
+    if (texture_handle != NULL) {
+      status = ts->texture(texture_handle,
+                           texture_thread_info,
+                           options,
+                           s,
+                           t,
+                           dsdx,
+                           dtdx,
+                           dsdy,
+                           dtdy,
+                           nchannels,
+                           result,
+                           dresultds,
+                           dresultdt);
+    }
+    else {
+      status = ts->texture(filename,
+                           options,
+                           s,
+                           t,
+                           dsdx,
+                           dtdx,
+                           dsdy,
+                           dtdy,
+                           nchannels,
+                           result,
+                           dresultds,
+                           dresultdt);
+    }
+  }
+
+  if (!status) {
+    if (nchannels == 3 || nchannels == 4) {
+      result[0] = 1.0f;
+      result[1] = 0.0f;
+      result[2] = 1.0f;
+
+      if (nchannels == 4)
+        result[3] = 1.0f;
+    }
+    /* This might be slow, but prevents error messages leak and
+     * other nasty stuff happening.
+     */
+    string err = ts->geterror();
+    (void)err;
+  }
+
+  return status;
 }
 
 bool OSLRenderServices::texture3d(ustring filename,
@@ -1114,68 +1157,76 @@ bool OSLRenderServices::texture3d(ustring filename,
                                   float *dresultdr,
                                   ustring *errormessage)
 {
-	OSL::TextureSystem *ts = osl_ts;
-	ShaderData *sd = (ShaderData *)(sg->renderstate);
-	KernelGlobals *kg = sd->osl_globals;
-
-	if(texture_thread_info == NULL) {
-		OSLThreadData *tdata = kg->osl_tdata;
-		texture_thread_info = tdata->oiio_thread_info;
-	}
-
-	bool status;
-	if(filename.length() && filename[0] == '@') {
-		int slot = atoi(filename.c_str() + 1);
-		float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE);
-
-		result[0] = rgba[0];
-		if(nchannels > 1)
-			result[1] = rgba[1];
-		if(nchannels > 2)
-			result[2] = rgba[2];
-		if(nchannels > 3)
-			result[3] = rgba[3];
-		status = true;
-	}
-	else {
-		if(texture_handle != NULL) {
-			status = ts->texture3d(texture_handle,
-			                       texture_thread_info,
-			                       options,
-			                       P,
-			                       dPdx, dPdy, dPdz,
-			                       nchannels,
-			                       result,
-			                       dresultds, dresultdt, dresultdr);
-		}
-		else {
-			status = ts->texture3d(filename,
-			                       options,
-			                       P,
-			                       dPdx, dPdy, dPdz,
-			                       nchannels,
-			                       result,
-			                       dresultds, dresultdt, dresultdr);
-		}
-	}
-
-	if(!status) {
-		if(nchannels == 3 || nchannels == 4) {
-			result[0] = 1.0f;
-			result[1] = 0.0f;
-			result[2] = 1.0f;
-
-			if(nchannels == 4)
-				result[3] = 1.0f;
-		}
-		/* This might be slow, but prevents error messages leak and
-		 * other nasty stuff happening.
-		 */
-		string err = ts->geterror();
-		(void) err;
-	}
-
-	return status;
+  OSL::TextureSystem *ts = osl_ts;
+  ShaderData *sd = (ShaderData *)(sg->renderstate);
+  KernelGlobals *kg = sd->osl_globals;
+
+  if (texture_thread_info == NULL) {
+    OSLThreadData *tdata = kg->osl_tdata;
+    texture_thread_info = tdata->oiio_thread_info;
+  }
+
+  bool status;
+  if (filename.length() && filename[0] == '@') {
+    int slot = atoi(filename.c_str() + 1);
+    float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE);
+
+    result[0] = rgba[0];
+    if (nchannels > 1)
+      result[1] = rgba[1];
+    if (nchannels > 2)
+      result[2] = rgba[2];
+    if (nchannels > 3)
+      result[3] = rgba[3];
+    status = true;
+  }
+  else {
+    if (texture_handle != NULL) {
+      status = ts->texture3d(texture_handle,
+                             texture_thread_info,
+                             options,
+                             P,
+                             dPdx,
+                             dPdy,
+                             dPdz,
+                             nchannels,
+                             result,
+                             dresultds,
+                             dresultdt,
+                             dresultdr);
+    }
+    else {
+      status = ts->texture3d(filename,
+                             options,
+                             P,
+                             dPdx,
+                             dPdy,
+                             dPdz,
+                             nchannels,
+                             result,
+                             dresultds,
+                             dresultdt,
+                             dresultdr);
+    }
+  }
+
+  if (!status) {
+    if (nchannels == 3 || nchannels == 4) {
+      result[0] = 1.0f;
+      result[1] = 0.0f;
+      result[2] = 1.0f;
+
+      if (nchannels == 4)
+        result[3] = 1.0f;
+    }
+    /* This might be slow, but prevents error messages leak and
+     * other nasty stuff happening.
+     */
+    string err = ts->geterror();
+    (void)err;
+  }
+
+  return status;
 }
 
 bool OSLRenderServices::environment(ustring filename,
@@ -1192,35 +1243,34 @@ bool OSLRenderServices::environment(ustring filename,
                                     float *dresultdt,
                                     ustring *errormessage)
 {
-	OSL::TextureSystem *ts = osl_ts;
-
-	if (thread_info == NULL) {
-		ShaderData *sd = (ShaderData *)(sg->renderstate);
-		KernelGlobals *kg = sd->osl_globals;
-		OSLThreadData *tdata = kg->osl_tdata;
-		thread_info = tdata->oiio_thread_info;
-	}
-
-	if (th == NULL) {
-		th = ts->get_texture_handle(filename, thread_info);
-	}
-
-	bool status = ts->environment(th, thread_info,
-	                              options, R, dRdx, dRdy,
-	                              nchannels, result, dresultds, dresultdt);
-
-	if(!status) {
-		if(nchannels == 3 || nchannels == 4) {
-			result[0] = 1.0f;
-			result[1] = 0.0f;
-			result[2] = 1.0f;
-
-			if(nchannels == 4)
-				result[3] = 1.0f;
-		}
-	}
-
-	return status;
+  OSL::TextureSystem *ts = osl_ts;
+
+  if (thread_info == NULL) {
+    ShaderData *sd = (ShaderData *)(sg->renderstate);
+    KernelGlobals *kg = sd->osl_globals;
+    OSLThreadData *tdata = kg->osl_tdata;
+    thread_info = tdata->oiio_thread_info;
+  }
+
+  if (th == NULL) {
+    th = ts->get_texture_handle(filename, thread_info);
+  }
+
+  bool status = ts->environment(
+      th, thread_info, options, R, dRdx, dRdy, nchannels, result, dresultds, dresultdt);
+
+  if (!status) {
+    if (nchannels == 3 || nchannels == 4) {
+      result[0] = 1.0f;
+      result[1] = 0.0f;
+      result[2] = 1.0f;
+
+      if (nchannels == 4)
+        result[3] = 1.0f;
+    }
+  }
+
+  return status;
 }
 
 bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg,
@@ -1231,138 +1281,158 @@ bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg,
                                          TypeDesc datatype,
                                          void *data)
 {
-	OSL::TextureSystem *ts = osl_ts;
-	if(filename.length() && filename[0] == '@') {
-		/* Special builtin textures. */
-		return false;
-	}
-	else {
-		return ts->get_texture_info(filename, subimage, dataname, datatype, data);
-	}
+  OSL::TextureSystem *ts = osl_ts;
+  if (filename.length() && filename[0] == '@') {
+    /* Special builtin textures. */
+    return false;
+  }
+  else {
+    return ts->get_texture_info(filename, subimage, dataname, datatype, data);
+  }
 }
 
-int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 &center,
-                                         float radius, int max_points, bool sort,
-                                         size_t *out_indices, float *out_distances, int derivs_offset)
+int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg,
+                                         ustring filename,
+                                         const OSL::Vec3 &center,
+                                         float radius,
+                                         int max_points,
+                                         bool sort,
+                                         size_t *out_indices,
+                                         float *out_distances,
+                                         int derivs_offset)
 {
-	return 0;
+  return 0;
 }
 
-int OSLRenderServices::pointcloud_get(OSL::ShaderGlobals *sg, ustring filename, size_t *indices, int count,
-                                      ustring attr_name, TypeDesc attr_type, void *out_data)
+int OSLRenderServices::pointcloud_get(OSL::ShaderGlobals *sg,
+                                      ustring filename,
+                                      size_t *indices,
+                                      int count,
+                                      ustring attr_name,
+                                      TypeDesc attr_type,
+                                      void *out_data)
 {
-	return 0;
+  return 0;
 }
 
 bool OSLRenderServices::pointcloud_write(OSL::ShaderGlobals *sg,
-                                         ustring filename, const OSL::Vec3 &pos,
-                                         int nattribs, const ustring *names,
+                                         ustring filename,
+                                         const OSL::Vec3 &pos,
+                                         int nattribs,
+                                         const ustring *names,
                                          const TypeDesc *types,
                                          const void **data)
 {
-	return false;
+  return false;
 }
 
-bool OSLRenderServices::trace(TraceOpt &options, OSL::ShaderGlobals *sg,
-	const OSL::Vec3 &P, const OSL::Vec3 &dPdx,
-	const OSL::Vec3 &dPdy, const OSL::Vec3 &R,
-	const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy)
+bool OSLRenderServices::trace(TraceOpt &options,
+                              OSL::ShaderGlobals *sg,
+                              const OSL::Vec3 &P,
+                              const OSL::Vec3 &dPdx,
+                              const OSL::Vec3 &dPdy,
+                              const OSL::Vec3 &R,
+                              const OSL::Vec3 &dRdx,
+                              const OSL::Vec3 &dRdy)
 {
-	/* todo: options.shader support, maybe options.traceset */
-	ShaderData *sd = (ShaderData *)(sg->renderstate);
-
-	/* setup ray */
-	Ray ray;
-
-	ray.P = TO_FLOAT3(P);
-	ray.D = TO_FLOAT3(R);
-	ray.t = (options.maxdist == 1.0e30f)? FLT_MAX: options.maxdist - options.mindist;
-	ray.time = sd->time;
-
-	if(options.mindist == 0.0f) {
-		/* avoid self-intersections */
-		if(ray.P == sd->P) {
-			bool transmit = (dot(sd->Ng, ray.D) < 0.0f);
-			ray.P = ray_offset(sd->P, (transmit)? -sd->Ng: sd->Ng);
-		}
-	}
-	else {
-		/* offset for minimum distance */
-		ray.P += options.mindist*ray.D;
-	}
-
-	/* ray differentials */
-	ray.dP.dx = TO_FLOAT3(dPdx);
-	ray.dP.dy = TO_FLOAT3(dPdy);
-	ray.dD.dx = TO_FLOAT3(dRdx);
-	ray.dD.dy = TO_FLOAT3(dRdy);
-
-	/* allocate trace data */
-	OSLTraceData *tracedata = (OSLTraceData*)sg->tracedata;
-	tracedata->ray = ray;
-	tracedata->setup = false;
-	tracedata->init = true;
-	tracedata->sd.osl_globals = sd->osl_globals;
-
-	/* Raytrace, leaving out shadow opaque to avoid early exit. */
-	uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE;
-	return scene_intersect(sd->osl_globals, ray, visibility, &tracedata->isect, NULL, 0.0f, 0.0f);
+  /* todo: options.shader support, maybe options.traceset */
+  ShaderData *sd = (ShaderData *)(sg->renderstate);
+
+  /* setup ray */
+  Ray ray;
+
+  ray.P = TO_FLOAT3(P);
+  ray.D = TO_FLOAT3(R);
+  ray.t = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist;
+  ray.time = sd->time;
+
+  if (options.mindist == 0.0f) {
+    /* avoid self-intersections */
+    if (ray.P == sd->P) {
+      bool transmit = (dot(sd->Ng, ray.D) < 0.0f);
+      ray.P = ray_offset(sd->P, (transmit) ? -sd->Ng : sd->Ng);
+    }
+  }
+  else {
+    /* offset for minimum distance */
+    ray.P += options.mindist * ray.D;
+  }
+
+  /* ray differentials */
+  ray.dP.dx = TO_FLOAT3(dPdx);
+  ray.dP.dy = TO_FLOAT3(dPdy);
+  ray.dD.dx = TO_FLOAT3(dRdx);
+  ray.dD.dy = TO_FLOAT3(dRdy);
+
+  /* allocate trace data */
+  OSLTraceData *tracedata = (OSLTraceData *)sg->tracedata;
+  tracedata->ray = ray;
+  tracedata->setup = false;
+  tracedata->init = true;
+  tracedata->sd.osl_globals = sd->osl_globals;
+
+  /* Raytrace, leaving out shadow opaque to avoid early exit. */
+  uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE;
+  return scene_intersect(sd->osl_globals, ray, visibility, &tracedata->isect, NULL, 0.0f, 0.0f);
 }
 
-
-bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, ustring source, ustring name,
-	TypeDesc type, void *val, bool derivatives)
+bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
+                                   ustring source,
+                                   ustring name,
+                                   TypeDesc type,
+                                   void *val,
+                                   bool derivatives)
 {
-	OSLTraceData *tracedata = (OSLTraceData*)sg->tracedata;
-
-	if(source == u_trace && tracedata->init) {
-		if(name == u_hit) {
-			return set_attribute_int((tracedata->isect.prim != PRIM_NONE), type, derivatives, val);
-		}
-		else if(tracedata->isect.prim != PRIM_NONE) {
-			if(name == u_hitdist) {
-				float f[3] = {tracedata->isect.t, 0.0f, 0.0f};
-				return set_attribute_float(f, type, derivatives, val);
-			}
-			else {
-				ShaderData *sd = &tracedata->sd;
-				KernelGlobals *kg = sd->osl_globals;
-
-				if(!tracedata->setup) {
-					/* lazy shader data setup */
-					shader_setup_from_ray(kg, sd, &tracedata->isect, &tracedata->ray);
-					tracedata->setup = true;
-				}
-
-				if(name == u_N) {
-					return set_attribute_float3(sd->N, type, derivatives, val);
-				}
-				else if(name == u_Ng) {
-					return set_attribute_float3(sd->Ng, type, derivatives, val);
-				}
-				else if(name == u_P) {
-					float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy};
-					return set_attribute_float3(f, type, derivatives, val);
-				}
-				else if(name == u_I) {
-					float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy};
-					return set_attribute_float3(f, type, derivatives, val);
-				}
-				else if(name == u_u) {
-					float f[3] = {sd->u, sd->du.dx, sd->du.dy};
-					return set_attribute_float(f, type, derivatives, val);
-				}
-				else if(name == u_v) {
-					float f[3] = {sd->v, sd->dv.dx, sd->dv.dy};
-					return set_attribute_float(f, type, derivatives, val);
-				}
-
-				return get_attribute(sd, derivatives, u_empty, type, name, val);
-			}
-		}
-	}
-
-	return false;
+  OSLTraceData *tracedata = (OSLTraceData *)sg->tracedata;
+
+  if (source == u_trace && tracedata->init) {
+    if (name == u_hit) {
+      return set_attribute_int((tracedata->isect.prim != PRIM_NONE), type, derivatives, val);
+    }
+    else if (tracedata->isect.prim != PRIM_NONE) {
+      if (name == u_hitdist) {
+        float f[3] = {tracedata->isect.t, 0.0f, 0.0f};
+        return set_attribute_float(f, type, derivatives, val);
+      }
+      else {
+        ShaderData *sd = &tracedata->sd;
+        KernelGlobals *kg = sd->osl_globals;
+
+        if (!tracedata->setup) {
+          /* lazy shader data setup */
+          shader_setup_from_ray(kg, sd, &tracedata->isect, &tracedata->ray);
+          tracedata->setup = true;
+        }
+
+        if (name == u_N) {
+          return set_attribute_float3(sd->N, type, derivatives, val);
+        }
+        else if (name == u_Ng) {
+          return set_attribute_float3(sd->Ng, type, derivatives, val);
+        }
+        else if (name == u_P) {
+          float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy};
+          return set_attribute_float3(f, type, derivatives, val);
+        }
+        else if (name == u_I) {
+          float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy};
+          return set_attribute_float3(f, type, derivatives, val);
+        }
+        else if (name == u_u) {
+          float f[3] = {sd->u, sd->du.dx, sd->du.dy};
+          return set_attribute_float(f, type, derivatives, val);
+        }
+        else if (name == u_v) {
+          float f[3] = {sd->v, sd->dv.dx, sd->dv.dy};
+          return set_attribute_float(f, type, derivatives, val);
+        }
+
+        return get_attribute(sd, derivatives, u_empty, type, name, val);
+      }
+    }
+  }
+
+  return false;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
index 3990a22aefd..2fad5833fc9 100644
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -40,177 +40,229 @@ class Shader;
 struct ShaderData;
 struct float3;
 struct KernelGlobals;
-class OSLRenderServices : public OSL::RendererServices
-{
-public:
-	OSLRenderServices();
-	~OSLRenderServices();
-
-	void thread_init(KernelGlobals *kernel_globals, OSL::TextureSystem *ts);
-
-	bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) override;
-	bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) override;
-
-	bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time) override;
-	bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time) override;
-
-	bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform) override;
-	bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform) override;
-
-	bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
-	bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
-
-	bool get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives,
-	                         ustring object, TypeDesc type, ustring name,
-	                         int index, void *val) override;
-	bool get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object,
-	                   TypeDesc type, ustring name, void *val) override;
-	bool get_attribute(ShaderData *sd, bool derivatives, ustring object_name,
-	                   TypeDesc type, ustring name, void *val);
-
-	bool get_userdata(bool derivatives, ustring name, TypeDesc type,
-	                  OSL::ShaderGlobals *sg, void *val) override;
-
-	int pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 &center,
-	                      float radius, int max_points, bool sort, size_t *out_indices,
-	                      float *out_distances, int derivs_offset) override;
-
-	int pointcloud_get(OSL::ShaderGlobals *sg, ustring filename, size_t *indices, int count,
-	                   ustring attr_name, TypeDesc attr_type, void *out_data) override;
-
-	bool pointcloud_write(OSL::ShaderGlobals *sg,
-	                      ustring filename, const OSL::Vec3 &pos,
-	                      int nattribs, const ustring *names,
-	                      const TypeDesc *types,
-	                      const void **data) override;
-
-	bool trace(TraceOpt &options, OSL::ShaderGlobals *sg,
-	           const OSL::Vec3 &P, const OSL::Vec3 &dPdx,
-	           const OSL::Vec3 &dPdy, const OSL::Vec3 &R,
-	           const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy) override;
-
-	bool getmessage(OSL::ShaderGlobals *sg, ustring source, ustring name,
-	                TypeDesc type, void *val, bool derivatives) override;
-
-	TextureSystem::TextureHandle *get_texture_handle(ustring filename) override;
-
-	bool good(TextureSystem::TextureHandle *texture_handle) override;
-
-	bool texture(ustring filename,
-	             TextureSystem::TextureHandle *texture_handle,
-	             TexturePerthread *texture_thread_info,
-	             TextureOpt &options,
-	             OSL::ShaderGlobals *sg,
-	             float s, float t,
-	             float dsdx, float dtdx, float dsdy, float dtdy,
-	             int nchannels,
-	             float *result,
-	             float *dresultds,
-	             float *dresultdt,
-	             ustring *errormessage) override;
-
-	bool texture3d(ustring filename,
-	               TextureHandle *texture_handle,
-	               TexturePerthread *texture_thread_info,
-	               TextureOpt &options,
-	               OSL::ShaderGlobals *sg,
-	               const OSL::Vec3 &P,
-	               const OSL::Vec3 &dPdx,
-	               const OSL::Vec3 &dPdy,
-	               const OSL::Vec3 &dPdz,
-	               int nchannels,
-	               float *result,
-	               float *dresultds,
-	               float *dresultdt,
-	               float *dresultdr,
-	               ustring *errormessage) override;
-
-	bool environment(ustring filename,
-	                 TextureHandle *texture_handle,
-	                 TexturePerthread *texture_thread_info,
-	                 TextureOpt &options,
-	                 OSL::ShaderGlobals *sg,
-	                 const OSL::Vec3 &R,
-	                 const OSL::Vec3 &dRdx,
-	                 const OSL::Vec3 &dRdy,
-	                 int nchannels,
-	                 float *result,
-	                 float *dresultds,
-	                 float *dresultdt,
-	                 ustring *errormessage) override;
-
-	bool get_texture_info(OSL::ShaderGlobals *sg,
-	                      ustring filename,
-	                      TextureHandle *texture_handle,
-	                      int subimage,
-	                      ustring dataname,
-	                      TypeDesc datatype,
-	                      void *data) override;
-
-	static bool get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
-	                                     TypeDesc type, bool derivatives, void *val);
-	static bool get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
-	                                          TypeDesc type, bool derivatives, void *val);
-
-	static ustring u_distance;
-	static ustring u_index;
-	static ustring u_world;
-	static ustring u_camera;
-	static ustring u_screen;
-	static ustring u_raster;
-	static ustring u_ndc;
-	static ustring u_object_location;
-	static ustring u_object_index;
-	static ustring u_geom_dupli_generated;
-	static ustring u_geom_dupli_uv;
-	static ustring u_material_index;
-	static ustring u_object_random;
-	static ustring u_particle_index;
-	static ustring u_particle_random;
-	static ustring u_particle_age;
-	static ustring u_particle_lifetime;
-	static ustring u_particle_location;
-	static ustring u_particle_rotation;
-	static ustring u_particle_size;
-	static ustring u_particle_velocity;
-	static ustring u_particle_angular_velocity;
-	static ustring u_geom_numpolyvertices;
-	static ustring u_geom_trianglevertices;
-	static ustring u_geom_polyvertices;
-	static ustring u_geom_name;
-	static ustring u_geom_undisplaced;
-	static ustring u_is_smooth;
-	static ustring u_is_curve;
-	static ustring u_curve_thickness;
-	static ustring u_curve_tangent_normal;
-	static ustring u_curve_random;
-	static ustring u_path_ray_length;
-	static ustring u_path_ray_depth;
-	static ustring u_path_diffuse_depth;
-	static ustring u_path_glossy_depth;
-	static ustring u_path_transparent_depth;
-	static ustring u_path_transmission_depth;
-	static ustring u_trace;
-	static ustring u_hit;
-	static ustring u_hitdist;
-	static ustring u_N;
-	static ustring u_Ng;
-	static ustring u_P;
-	static ustring u_I;
-	static ustring u_u;
-	static ustring u_v;
-	static ustring u_empty;
-	static ustring u_at_bevel;
-	static ustring u_at_ao;
-
-private:
-	KernelGlobals *kernel_globals;
-	OSL::TextureSystem *osl_ts;
+class OSLRenderServices : public OSL::RendererServices {
+ public:
+  OSLRenderServices();
+  ~OSLRenderServices();
+
+  void thread_init(KernelGlobals *kernel_globals, OSL::TextureSystem *ts);
+
+  bool get_matrix(OSL::ShaderGlobals *sg,
+                  OSL::Matrix44 &result,
+                  OSL::TransformationPtr xform,
+                  float time) override;
+  bool get_inverse_matrix(OSL::ShaderGlobals *sg,
+                          OSL::Matrix44 &result,
+                          OSL::TransformationPtr xform,
+                          float time) override;
+
+  bool get_matrix(OSL::ShaderGlobals *sg,
+                  OSL::Matrix44 &result,
+                  ustring from,
+                  float time) override;
+  bool get_inverse_matrix(OSL::ShaderGlobals *sg,
+                          OSL::Matrix44 &result,
+                          ustring to,
+                          float time) override;
+
+  bool get_matrix(OSL::ShaderGlobals *sg,
+                  OSL::Matrix44 &result,
+                  OSL::TransformationPtr xform) override;
+  bool get_inverse_matrix(OSL::ShaderGlobals *sg,
+                          OSL::Matrix44 &result,
+                          OSL::TransformationPtr xform) override;
+
+  bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
+  bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
+
+  bool get_array_attribute(OSL::ShaderGlobals *sg,
+                           bool derivatives,
+                           ustring object,
+                           TypeDesc type,
+                           ustring name,
+                           int index,
+                           void *val) override;
+  bool get_attribute(OSL::ShaderGlobals *sg,
+                     bool derivatives,
+                     ustring object,
+                     TypeDesc type,
+                     ustring name,
+                     void *val) override;
+  bool get_attribute(ShaderData *sd,
+                     bool derivatives,
+                     ustring object_name,
+                     TypeDesc type,
+                     ustring name,
+                     void *val);
+
+  bool get_userdata(
+      bool derivatives, ustring name, TypeDesc type, OSL::ShaderGlobals *sg, void *val) override;
+
+  int pointcloud_search(OSL::ShaderGlobals *sg,
+                        ustring filename,
+                        const OSL::Vec3 &center,
+                        float radius,
+                        int max_points,
+                        bool sort,
+                        size_t *out_indices,
+                        float *out_distances,
+                        int derivs_offset) override;
+
+  int pointcloud_get(OSL::ShaderGlobals *sg,
+                     ustring filename,
+                     size_t *indices,
+                     int count,
+                     ustring attr_name,
+                     TypeDesc attr_type,
+                     void *out_data) override;
+
+  bool pointcloud_write(OSL::ShaderGlobals *sg,
+                        ustring filename,
+                        const OSL::Vec3 &pos,
+                        int nattribs,
+                        const ustring *names,
+                        const TypeDesc *types,
+                        const void **data) override;
+
+  bool trace(TraceOpt &options,
+             OSL::ShaderGlobals *sg,
+             const OSL::Vec3 &P,
+             const OSL::Vec3 &dPdx,
+             const OSL::Vec3 &dPdy,
+             const OSL::Vec3 &R,
+             const OSL::Vec3 &dRdx,
+             const OSL::Vec3 &dRdy) override;
+
+  bool getmessage(OSL::ShaderGlobals *sg,
+                  ustring source,
+                  ustring name,
+                  TypeDesc type,
+                  void *val,
+                  bool derivatives) override;
+
+  TextureSystem::TextureHandle *get_texture_handle(ustring filename) override;
+
+  bool good(TextureSystem::TextureHandle *texture_handle) override;
+
+  bool texture(ustring filename,
+               TextureSystem::TextureHandle *texture_handle,
+               TexturePerthread *texture_thread_info,
+               TextureOpt &options,
+               OSL::ShaderGlobals *sg,
+               float s,
+               float t,
+               float dsdx,
+               float dtdx,
+               float dsdy,
+               float dtdy,
+               int nchannels,
+               float *result,
+               float *dresultds,
+               float *dresultdt,
+               ustring *errormessage) override;
+
+  bool texture3d(ustring filename,
+                 TextureHandle *texture_handle,
+                 TexturePerthread *texture_thread_info,
+                 TextureOpt &options,
+                 OSL::ShaderGlobals *sg,
+                 const OSL::Vec3 &P,
+                 const OSL::Vec3 &dPdx,
+                 const OSL::Vec3 &dPdy,
+                 const OSL::Vec3 &dPdz,
+                 int nchannels,
+                 float *result,
+                 float *dresultds,
+                 float *dresultdt,
+                 float *dresultdr,
+                 ustring *errormessage) override;
+
+  bool environment(ustring filename,
+                   TextureHandle *texture_handle,
+                   TexturePerthread *texture_thread_info,
+                   TextureOpt &options,
+                   OSL::ShaderGlobals *sg,
+                   const OSL::Vec3 &R,
+                   const OSL::Vec3 &dRdx,
+                   const OSL::Vec3 &dRdy,
+                   int nchannels,
+                   float *result,
+                   float *dresultds,
+                   float *dresultdt,
+                   ustring *errormessage) override;
+
+  bool get_texture_info(OSL::ShaderGlobals *sg,
+                        ustring filename,
+                        TextureHandle *texture_handle,
+                        int subimage,
+                        ustring dataname,
+                        TypeDesc datatype,
+                        void *data) override;
+
+  static bool get_background_attribute(
+      KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val);
+  static bool get_object_standard_attribute(
+      KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val);
+
+  static ustring u_distance;
+  static ustring u_index;
+  static ustring u_world;
+  static ustring u_camera;
+  static ustring u_screen;
+  static ustring u_raster;
+  static ustring u_ndc;
+  static ustring u_object_location;
+  static ustring u_object_index;
+  static ustring u_geom_dupli_generated;
+  static ustring u_geom_dupli_uv;
+  static ustring u_material_index;
+  static ustring u_object_random;
+  static ustring u_particle_index;
+  static ustring u_particle_random;
+  static ustring u_particle_age;
+  static ustring u_particle_lifetime;
+  static ustring u_particle_location;
+  static ustring u_particle_rotation;
+  static ustring u_particle_size;
+  static ustring u_particle_velocity;
+  static ustring u_particle_angular_velocity;
+  static ustring u_geom_numpolyvertices;
+  static ustring u_geom_trianglevertices;
+  static ustring u_geom_polyvertices;
+  static ustring u_geom_name;
+  static ustring u_geom_undisplaced;
+  static ustring u_is_smooth;
+  static ustring u_is_curve;
+  static ustring u_curve_thickness;
+  static ustring u_curve_tangent_normal;
+  static ustring u_curve_random;
+  static ustring u_path_ray_length;
+  static ustring u_path_ray_depth;
+  static ustring u_path_diffuse_depth;
+  static ustring u_path_glossy_depth;
+  static ustring u_path_transparent_depth;
+  static ustring u_path_transmission_depth;
+  static ustring u_trace;
+  static ustring u_hit;
+  static ustring u_hitdist;
+  static ustring u_N;
+  static ustring u_Ng;
+  static ustring u_P;
+  static ustring u_I;
+  static ustring u_u;
+  static ustring u_v;
+  static ustring u_empty;
+  static ustring u_at_bevel;
+  static ustring u_at_ao;
+
+ private:
+  KernelGlobals *kernel_globals;
+  OSL::TextureSystem *osl_ts;
 #ifdef WITH_PTEX
-	PtexCache *ptex_cache;
+  PtexCache *ptex_cache;
 #endif
 };
 
 CCL_NAMESPACE_END
 
-#endif  /* __OSL_SERVICES_H__  */
+#endif /* __OSL_SERVICES_H__  */
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index a89bb3fd1a3..3d9c579c9ff 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -33,103 +33,104 @@
 
 #include "render/attribute.h"
 
-
 CCL_NAMESPACE_BEGIN
 
 /* Threads */
 
-void OSLShader::thread_init(KernelGlobals *kg, KernelGlobals *kernel_globals, OSLGlobals *osl_globals)
+void OSLShader::thread_init(KernelGlobals *kg,
+                            KernelGlobals *kernel_globals,
+                            OSLGlobals *osl_globals)
 {
-	/* no osl used? */
-	if(!osl_globals->use) {
-		kg->osl = NULL;
-		return;
-	}
+  /* no osl used? */
+  if (!osl_globals->use) {
+    kg->osl = NULL;
+    return;
+  }
 
-	/* per thread kernel data init*/
-	kg->osl = osl_globals;
-	kg->osl->services->thread_init(kernel_globals, osl_globals->ts);
+  /* per thread kernel data init*/
+  kg->osl = osl_globals;
+  kg->osl->services->thread_init(kernel_globals, osl_globals->ts);
 
-	OSL::ShadingSystem *ss = kg->osl->ss;
-	OSLThreadData *tdata = new OSLThreadData();
+  OSL::ShadingSystem *ss = kg->osl->ss;
+  OSLThreadData *tdata = new OSLThreadData();
 
-	memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals));
-	tdata->globals.tracedata = &tdata->tracedata;
-	tdata->globals.flipHandedness = false;
-	tdata->osl_thread_info = ss->create_thread_info();
-	tdata->context = ss->get_context(tdata->osl_thread_info);
+  memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals));
+  tdata->globals.tracedata = &tdata->tracedata;
+  tdata->globals.flipHandedness = false;
+  tdata->osl_thread_info = ss->create_thread_info();
+  tdata->context = ss->get_context(tdata->osl_thread_info);
 
-	tdata->oiio_thread_info = osl_globals->ts->get_perthread_info();
+  tdata->oiio_thread_info = osl_globals->ts->get_perthread_info();
 
-	kg->osl_ss = (OSLShadingSystem*)ss;
-	kg->osl_tdata = tdata;
+  kg->osl_ss = (OSLShadingSystem *)ss;
+  kg->osl_tdata = tdata;
 }
 
 void OSLShader::thread_free(KernelGlobals *kg)
 {
-	if(!kg->osl)
-		return;
+  if (!kg->osl)
+    return;
 
-	OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
-	OSLThreadData *tdata = kg->osl_tdata;
-	ss->release_context(tdata->context);
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+  OSLThreadData *tdata = kg->osl_tdata;
+  ss->release_context(tdata->context);
 
-	ss->destroy_thread_info(tdata->osl_thread_info);
+  ss->destroy_thread_info(tdata->osl_thread_info);
 
-	delete tdata;
+  delete tdata;
 
-	kg->osl = NULL;
-	kg->osl_ss = NULL;
-	kg->osl_tdata = NULL;
+  kg->osl = NULL;
+  kg->osl_ss = NULL;
+  kg->osl_tdata = NULL;
 }
 
 /* Globals */
 
-static void shaderdata_to_shaderglobals(KernelGlobals *kg, ShaderData *sd, PathState *state,
-                                        int path_flag, OSLThreadData *tdata)
+static void shaderdata_to_shaderglobals(
+    KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag, OSLThreadData *tdata)
 {
-	OSL::ShaderGlobals *globals = &tdata->globals;
-
-	/* copy from shader data to shader globals */
-	globals->P = TO_VEC3(sd->P);
-	globals->dPdx = TO_VEC3(sd->dP.dx);
-	globals->dPdy = TO_VEC3(sd->dP.dy);
-	globals->I = TO_VEC3(sd->I);
-	globals->dIdx = TO_VEC3(sd->dI.dx);
-	globals->dIdy = TO_VEC3(sd->dI.dy);
-	globals->N = TO_VEC3(sd->N);
-	globals->Ng = TO_VEC3(sd->Ng);
-	globals->u = sd->u;
-	globals->dudx = sd->du.dx;
-	globals->dudy = sd->du.dy;
-	globals->v = sd->v;
-	globals->dvdx = sd->dv.dx;
-	globals->dvdy = sd->dv.dy;
-	globals->dPdu = TO_VEC3(sd->dPdu);
-	globals->dPdv = TO_VEC3(sd->dPdv);
-	globals->surfacearea = (sd->object == OBJECT_NONE) ? 1.0f : object_surface_area(kg, sd->object);
-	globals->time = sd->time;
-
-	/* booleans */
-	globals->raytype = path_flag;
-	globals->backfacing = (sd->flag & SD_BACKFACING);
-
-	/* shader data to be used in services callbacks */
-	globals->renderstate = sd;
-
-	/* hacky, we leave it to services to fetch actual object matrix */
-	globals->shader2common = sd;
-	globals->object2common = sd;
-
-	/* must be set to NULL before execute */
-	globals->Ci = NULL;
-
-	/* clear trace data */
-	tdata->tracedata.init = false;
-
-	/* used by renderservices */
-	sd->osl_globals = kg;
-	sd->osl_path_state = state;
+  OSL::ShaderGlobals *globals = &tdata->globals;
+
+  /* copy from shader data to shader globals */
+  globals->P = TO_VEC3(sd->P);
+  globals->dPdx = TO_VEC3(sd->dP.dx);
+  globals->dPdy = TO_VEC3(sd->dP.dy);
+  globals->I = TO_VEC3(sd->I);
+  globals->dIdx = TO_VEC3(sd->dI.dx);
+  globals->dIdy = TO_VEC3(sd->dI.dy);
+  globals->N = TO_VEC3(sd->N);
+  globals->Ng = TO_VEC3(sd->Ng);
+  globals->u = sd->u;
+  globals->dudx = sd->du.dx;
+  globals->dudy = sd->du.dy;
+  globals->v = sd->v;
+  globals->dvdx = sd->dv.dx;
+  globals->dvdy = sd->dv.dy;
+  globals->dPdu = TO_VEC3(sd->dPdu);
+  globals->dPdv = TO_VEC3(sd->dPdv);
+  globals->surfacearea = (sd->object == OBJECT_NONE) ? 1.0f : object_surface_area(kg, sd->object);
+  globals->time = sd->time;
+
+  /* booleans */
+  globals->raytype = path_flag;
+  globals->backfacing = (sd->flag & SD_BACKFACING);
+
+  /* shader data to be used in services callbacks */
+  globals->renderstate = sd;
+
+  /* hacky, we leave it to services to fetch actual object matrix */
+  globals->shader2common = sd;
+  globals->object2common = sd;
+
+  /* must be set to NULL before execute */
+  globals->Ci = NULL;
+
+  /* clear trace data */
+  tdata->tracedata.init = false;
+
+  /* used by renderservices */
+  sd->osl_globals = kg;
+  sd->osl_path_state = state;
 }
 
 /* Surface */
@@ -139,97 +140,101 @@ static void flatten_surface_closure_tree(ShaderData *sd,
                                          const OSL::ClosureColor *closure,
                                          float3 weight = make_float3(1.0f, 1.0f, 1.0f))
 {
-	/* OSL gives us a closure tree, we flatten it into arrays per
-	 * closure type, for evaluation, sampling, etc later on. */
-
-	switch(closure->id) {
-		case OSL::ClosureColor::MUL: {
-			OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
-			flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
-			break;
-		}
-		case OSL::ClosureColor::ADD: {
-			OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
-			flatten_surface_closure_tree(sd, path_flag, add->closureA, weight);
-			flatten_surface_closure_tree(sd, path_flag, add->closureB, weight);
-			break;
-		}
-		default: {
-			OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
-			CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
-			if(prim) {
+  /* OSL gives us a closure tree, we flatten it into arrays per
+   * closure type, for evaluation, sampling, etc later on. */
+
+  switch (closure->id) {
+    case OSL::ClosureColor::MUL: {
+      OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
+      flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
+      break;
+    }
+    case OSL::ClosureColor::ADD: {
+      OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
+      flatten_surface_closure_tree(sd, path_flag, add->closureA, weight);
+      flatten_surface_closure_tree(sd, path_flag, add->closureB, weight);
+      break;
+    }
+    default: {
+      OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
+      CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
+
+      if (prim) {
 #ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
-				weight = weight*TO_FLOAT3(comp->w);
+        weight = weight * TO_FLOAT3(comp->w);
 #endif
-				prim->setup(sd, path_flag, weight);
-			}
-			break;
-		}
-	}
+        prim->setup(sd, path_flag, weight);
+      }
+      break;
+    }
+  }
 }
 
 void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag)
 {
-	/* setup shader globals from shader data */
-	OSLThreadData *tdata = kg->osl_tdata;
-	shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
-	/* execute shader for this point */
-	OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
-	OSL::ShaderGlobals *globals = &tdata->globals;
-	OSL::ShadingContext *octx = tdata->context;
-	int shader = sd->shader & SHADER_MASK;
-
-	/* automatic bump shader */
-	if(kg->osl->bump_state[shader]) {
-		/* save state */
-		float3 P = sd->P;
-		float3 dPdx = sd->dP.dx;
-		float3 dPdy = sd->dP.dy;
-
-		/* set state as if undisplaced */
-		if(sd->flag & SD_HAS_DISPLACEMENT) {
-			float data[9];
-			bool found = kg->osl->services->get_attribute(sd, true, OSLRenderServices::u_empty, TypeDesc::TypeVector,
-			                                              OSLRenderServices::u_geom_undisplaced, data);
-			(void) found;
-			assert(found);
-
-			memcpy(&sd->P, data, sizeof(float)*3);
-			memcpy(&sd->dP.dx, data+3, sizeof(float)*3);
-			memcpy(&sd->dP.dy, data+6, sizeof(float)*3);
-
-			object_position_transform(kg, sd, &sd->P);
-			object_dir_transform(kg, sd, &sd->dP.dx);
-			object_dir_transform(kg, sd, &sd->dP.dy);
-
-			globals->P = TO_VEC3(sd->P);
-			globals->dPdx = TO_VEC3(sd->dP.dx);
-			globals->dPdy = TO_VEC3(sd->dP.dy);
-		}
-
-		/* execute bump shader */
-		ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
-
-		/* reset state */
-		sd->P = P;
-		sd->dP.dx = dPdx;
-		sd->dP.dy = dPdy;
-
-		globals->P = TO_VEC3(P);
-		globals->dPdx = TO_VEC3(dPdx);
-		globals->dPdy = TO_VEC3(dPdy);
-	}
-
-	/* surface shader */
-	if(kg->osl->surface_state[shader]) {
-		ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
-	}
-
-	/* flatten closure tree */
-	if(globals->Ci)
-		flatten_surface_closure_tree(sd, path_flag, globals->Ci);
+  /* setup shader globals from shader data */
+  OSLThreadData *tdata = kg->osl_tdata;
+  shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+
+  /* execute shader for this point */
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+  OSL::ShaderGlobals *globals = &tdata->globals;
+  OSL::ShadingContext *octx = tdata->context;
+  int shader = sd->shader & SHADER_MASK;
+
+  /* automatic bump shader */
+  if (kg->osl->bump_state[shader]) {
+    /* save state */
+    float3 P = sd->P;
+    float3 dPdx = sd->dP.dx;
+    float3 dPdy = sd->dP.dy;
+
+    /* set state as if undisplaced */
+    if (sd->flag & SD_HAS_DISPLACEMENT) {
+      float data[9];
+      bool found = kg->osl->services->get_attribute(sd,
+                                                    true,
+                                                    OSLRenderServices::u_empty,
+                                                    TypeDesc::TypeVector,
+                                                    OSLRenderServices::u_geom_undisplaced,
+                                                    data);
+      (void)found;
+      assert(found);
+
+      memcpy(&sd->P, data, sizeof(float) * 3);
+      memcpy(&sd->dP.dx, data + 3, sizeof(float) * 3);
+      memcpy(&sd->dP.dy, data + 6, sizeof(float) * 3);
+
+      object_position_transform(kg, sd, &sd->P);
+      object_dir_transform(kg, sd, &sd->dP.dx);
+      object_dir_transform(kg, sd, &sd->dP.dy);
+
+      globals->P = TO_VEC3(sd->P);
+      globals->dPdx = TO_VEC3(sd->dP.dx);
+      globals->dPdy = TO_VEC3(sd->dP.dy);
+    }
+
+    /* execute bump shader */
+    ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
+
+    /* reset state */
+    sd->P = P;
+    sd->dP.dx = dPdx;
+    sd->dP.dy = dPdy;
+
+    globals->P = TO_VEC3(P);
+    globals->dPdx = TO_VEC3(dPdx);
+    globals->dPdy = TO_VEC3(dPdy);
+  }
+
+  /* surface shader */
+  if (kg->osl->surface_state[shader]) {
+    ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
+  }
+
+  /* flatten closure tree */
+  if (globals->Ci)
+    flatten_surface_closure_tree(sd, path_flag, globals->Ci);
 }
 
 /* Background */
@@ -238,56 +243,56 @@ static void flatten_background_closure_tree(ShaderData *sd,
                                             const OSL::ClosureColor *closure,
                                             float3 weight = make_float3(1.0f, 1.0f, 1.0f))
 {
-	/* OSL gives us a closure tree, if we are shading for background there
-	 * is only one supported closure type at the moment, which has no evaluation
-	 * functions, so we just sum the weights */
-
-	switch(closure->id) {
-		case OSL::ClosureColor::MUL: {
-			OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
-			flatten_background_closure_tree(sd, mul->closure, weight * TO_FLOAT3(mul->weight));
-			break;
-		}
-		case OSL::ClosureColor::ADD: {
-			OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
-
-			flatten_background_closure_tree(sd, add->closureA, weight);
-			flatten_background_closure_tree(sd, add->closureB, weight);
-			break;
-		}
-		default: {
-			OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
-			CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
-			if(prim) {
+  /* OSL gives us a closure tree, if we are shading for background there
+   * is only one supported closure type at the moment, which has no evaluation
+   * functions, so we just sum the weights */
+
+  switch (closure->id) {
+    case OSL::ClosureColor::MUL: {
+      OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
+      flatten_background_closure_tree(sd, mul->closure, weight * TO_FLOAT3(mul->weight));
+      break;
+    }
+    case OSL::ClosureColor::ADD: {
+      OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
+
+      flatten_background_closure_tree(sd, add->closureA, weight);
+      flatten_background_closure_tree(sd, add->closureB, weight);
+      break;
+    }
+    default: {
+      OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
+      CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
+
+      if (prim) {
 #ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
-				weight = weight*TO_FLOAT3(comp->w);
+        weight = weight * TO_FLOAT3(comp->w);
 #endif
-				prim->setup(sd, 0, weight);
-			}
-			break;
-		}
-	}
+        prim->setup(sd, 0, weight);
+      }
+      break;
+    }
+  }
 }
 
 void OSLShader::eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag)
 {
-	/* setup shader globals from shader data */
-	OSLThreadData *tdata = kg->osl_tdata;
-	shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
-	/* execute shader for this point */
-	OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
-	OSL::ShaderGlobals *globals = &tdata->globals;
-	OSL::ShadingContext *octx = tdata->context;
-
-	if(kg->osl->background_state) {
-		ss->execute(octx, *(kg->osl->background_state), *globals);
-	}
-
-	/* return background color immediately */
-	if(globals->Ci)
-		flatten_background_closure_tree(sd, globals->Ci);
+  /* setup shader globals from shader data */
+  OSLThreadData *tdata = kg->osl_tdata;
+  shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+
+  /* execute shader for this point */
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+  OSL::ShaderGlobals *globals = &tdata->globals;
+  OSL::ShadingContext *octx = tdata->context;
+
+  if (kg->osl->background_state) {
+    ss->execute(octx, *(kg->osl->background_state), *globals);
+  }
+
+  /* return background color immediately */
+  if (globals->Ci)
+    flatten_background_closure_tree(sd, globals->Ci);
 }
 
 /* Volume */
@@ -296,112 +301,117 @@ static void flatten_volume_closure_tree(ShaderData *sd,
                                         const OSL::ClosureColor *closure,
                                         float3 weight = make_float3(1.0f, 1.0f, 1.0f))
 {
-	/* OSL gives us a closure tree, we flatten it into arrays per
-	 * closure type, for evaluation, sampling, etc later on. */
-
-	switch(closure->id) {
-		case OSL::ClosureColor::MUL: {
-			OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
-			flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight);
-			break;
-		}
-		case OSL::ClosureColor::ADD: {
-			OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
-			flatten_volume_closure_tree(sd, add->closureA, weight);
-			flatten_volume_closure_tree(sd, add->closureB, weight);
-			break;
-		}
-		default: {
-			OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
-			CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
-			if(prim) {
+  /* OSL gives us a closure tree, we flatten it into arrays per
+   * closure type, for evaluation, sampling, etc later on. */
+
+  switch (closure->id) {
+    case OSL::ClosureColor::MUL: {
+      OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
+      flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight);
+      break;
+    }
+    case OSL::ClosureColor::ADD: {
+      OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
+      flatten_volume_closure_tree(sd, add->closureA, weight);
+      flatten_volume_closure_tree(sd, add->closureB, weight);
+      break;
+    }
+    default: {
+      OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
+      CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
+
+      if (prim) {
 #ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
-				weight = weight*TO_FLOAT3(comp->w);
+        weight = weight * TO_FLOAT3(comp->w);
 #endif
-				prim->setup(sd, 0, weight);
-			}
-		}
-	}
+        prim->setup(sd, 0, weight);
+      }
+    }
+  }
 }
 
 void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag)
 {
-	/* setup shader globals from shader data */
-	OSLThreadData *tdata = kg->osl_tdata;
-	shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
-	/* execute shader */
-	OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
-	OSL::ShaderGlobals *globals = &tdata->globals;
-	OSL::ShadingContext *octx = tdata->context;
-	int shader = sd->shader & SHADER_MASK;
-
-	if(kg->osl->volume_state[shader]) {
-		ss->execute(octx, *(kg->osl->volume_state[shader]), *globals);
-	}
-
-	/* flatten closure tree */
-	if(globals->Ci)
-		flatten_volume_closure_tree(sd, globals->Ci);
+  /* setup shader globals from shader data */
+  OSLThreadData *tdata = kg->osl_tdata;
+  shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+
+  /* execute shader */
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+  OSL::ShaderGlobals *globals = &tdata->globals;
+  OSL::ShadingContext *octx = tdata->context;
+  int shader = sd->shader & SHADER_MASK;
+
+  if (kg->osl->volume_state[shader]) {
+    ss->execute(octx, *(kg->osl->volume_state[shader]), *globals);
+  }
+
+  /* flatten closure tree */
+  if (globals->Ci)
+    flatten_volume_closure_tree(sd, globals->Ci);
 }
 
 /* Displacement */
 
 void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state)
 {
-	/* setup shader globals from shader data */
-	OSLThreadData *tdata = kg->osl_tdata;
+  /* setup shader globals from shader data */
+  OSLThreadData *tdata = kg->osl_tdata;
 
-	shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
+  shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
 
-	/* execute shader */
-	OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
-	OSL::ShaderGlobals *globals = &tdata->globals;
-	OSL::ShadingContext *octx = tdata->context;
-	int shader = sd->shader & SHADER_MASK;
+  /* execute shader */
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+  OSL::ShaderGlobals *globals = &tdata->globals;
+  OSL::ShadingContext *octx = tdata->context;
+  int shader = sd->shader & SHADER_MASK;
 
-	if(kg->osl->displacement_state[shader]) {
-		ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals);
-	}
+  if (kg->osl->displacement_state[shader]) {
+    ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals);
+  }
 
-	/* get back position */
-	sd->P = TO_FLOAT3(globals->P);
+  /* get back position */
+  sd->P = TO_FLOAT3(globals->P);
 }
 
 /* Attributes */
 
-int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc)
+int OSLShader::find_attribute(KernelGlobals *kg,
+                              const ShaderData *sd,
+                              uint id,
+                              AttributeDescriptor *desc)
 {
-	/* for OSL, a hash map is used to lookup the attribute by name. */
-	int object = sd->object*ATTR_PRIM_TYPES;
+  /* for OSL, a hash map is used to lookup the attribute by name. */
+  int object = sd->object * ATTR_PRIM_TYPES;
 #ifdef __HAIR__
-	if(sd->type & PRIMITIVE_ALL_CURVE) object += ATTR_PRIM_CURVE;
+  if (sd->type & PRIMITIVE_ALL_CURVE)
+    object += ATTR_PRIM_CURVE;
 #endif
 
-	OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object];
-	ustring stdname(std::string("geom:") + std::string(Attribute::standard_name((AttributeStandard)id)));
-	OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname);
-
-	if(it != attr_map.end()) {
-		const OSLGlobals::Attribute &osl_attr = it->second;
-		*desc = osl_attr.desc;
-
-		if(sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) {
-			desc->offset = ATTR_STD_NOT_FOUND;
-			return ATTR_STD_NOT_FOUND;
-		}
-
-		/* return result */
-		if(osl_attr.desc.element == ATTR_ELEMENT_NONE) {
-			desc->offset = ATTR_STD_NOT_FOUND;
-		}
-		return desc->offset;
-	}
-	else {
-		desc->offset = ATTR_STD_NOT_FOUND;
-		return (int)ATTR_STD_NOT_FOUND;
-	}
+  OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object];
+  ustring stdname(std::string("geom:") +
+                  std::string(Attribute::standard_name((AttributeStandard)id)));
+  OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname);
+
+  if (it != attr_map.end()) {
+    const OSLGlobals::Attribute &osl_attr = it->second;
+    *desc = osl_attr.desc;
+
+    if (sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) {
+      desc->offset = ATTR_STD_NOT_FOUND;
+      return ATTR_STD_NOT_FOUND;
+    }
+
+    /* return result */
+    if (osl_attr.desc.element == ATTR_ELEMENT_NONE) {
+      desc->offset = ATTR_STD_NOT_FOUND;
+    }
+    return desc->offset;
+  }
+  else {
+    desc->offset = ATTR_STD_NOT_FOUND;
+    return (int)ATTR_STD_NOT_FOUND;
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h
index 9824f966a44..a4fa24d0a90 100644
--- a/intern/cycles/kernel/osl/osl_shader.h
+++ b/intern/cycles/kernel/osl/osl_shader.h
@@ -29,7 +29,7 @@
  * This means no thread state must be passed along in the kernel itself.
  */
 
-#include "kernel/kernel_types.h"
+#  include "kernel/kernel_types.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -44,26 +44,31 @@ struct OSLGlobals;
 struct OSLShadingSystem;
 
 class OSLShader {
-public:
-	/* init */
-	static void register_closures(OSLShadingSystem *ss);
+ public:
+  /* init */
+  static void register_closures(OSLShadingSystem *ss);
 
-	/* per thread data */
-	static void thread_init(KernelGlobals *kg, KernelGlobals *kernel_globals, OSLGlobals *osl_globals);
-	static void thread_free(KernelGlobals *kg);
+  /* per thread data */
+  static void thread_init(KernelGlobals *kg,
+                          KernelGlobals *kernel_globals,
+                          OSLGlobals *osl_globals);
+  static void thread_free(KernelGlobals *kg);
 
-	/* eval */
-	static void eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
-	static void eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
-	static void eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
-	static void eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state);
+  /* eval */
+  static void eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
+  static void eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
+  static void eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
+  static void eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state);
 
-	/* attributes */
-	static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc);
+  /* attributes */
+  static int find_attribute(KernelGlobals *kg,
+                            const ShaderData *sd,
+                            uint id,
+                            AttributeDescriptor *desc);
 };
 
 CCL_NAMESPACE_END
 
 #endif
 
-#endif  /* __OSL_SHADER_H__ */
+#endif /* __OSL_SHADER_H__ */
diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt
index 4740db27d4e..b42b9b2fe64 100644
--- a/intern/cycles/kernel/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/shaders/CMakeLists.txt
@@ -2,102 +2,102 @@
 # OSL node shaders
 
 set(SRC_OSL
-	node_add_closure.osl
-	node_ambient_occlusion.osl
-	node_anisotropic_bsdf.osl
-	node_attribute.osl
-	node_background.osl
-	node_bevel.osl
-	node_brick_texture.osl
-	node_brightness.osl
-	node_bump.osl
-	node_camera.osl
-	node_checker_texture.osl
-	node_combine_rgb.osl
-	node_combine_hsv.osl
-	node_combine_xyz.osl
-	node_convert_from_color.osl
-	node_convert_from_float.osl
-	node_convert_from_int.osl
-	node_convert_from_normal.osl
-	node_convert_from_point.osl
-	node_convert_from_vector.osl
-	node_diffuse_bsdf.osl
-	node_displacement.osl
-	node_vector_displacement.osl
-	node_emission.osl
-	node_environment_texture.osl
-	node_fresnel.osl
-	node_gamma.osl
-	node_geometry.osl
-	node_glass_bsdf.osl
-	node_glossy_bsdf.osl
-	node_gradient_texture.osl
-	node_hair_info.osl
-	node_scatter_volume.osl
-	node_absorption_volume.osl
-	node_principled_volume.osl
-	node_holdout.osl
-	node_hsv.osl
-	node_ies_light.osl
-	node_image_texture.osl
-	node_invert.osl
-	node_layer_weight.osl
-	node_light_falloff.osl
-	node_light_path.osl
-	node_magic_texture.osl
-	node_mapping.osl
-	node_math.osl
-	node_mix.osl
-	node_mix_closure.osl
-	node_musgrave_texture.osl
-	node_noise_texture.osl
-	node_normal.osl
-	node_normal_map.osl
-	node_object_info.osl
-	node_output_displacement.osl
-	node_output_surface.osl
-	node_output_volume.osl
-	node_particle_info.osl
-	node_refraction_bsdf.osl
-	node_rgb_curves.osl
-	node_rgb_ramp.osl
-	node_separate_rgb.osl
-	node_separate_hsv.osl
-	node_separate_xyz.osl
-	node_set_normal.osl
-	node_sky_texture.osl
-	node_subsurface_scattering.osl
-	node_tangent.osl
-	node_texture_coordinate.osl
-	node_toon_bsdf.osl
-	node_translucent_bsdf.osl
-	node_transparent_bsdf.osl
-	node_value.osl
-	node_vector_curves.osl
-	node_vector_math.osl
-	node_vector_transform.osl
-	node_velvet_bsdf.osl
-	node_voronoi_texture.osl
-	node_voxel_texture.osl
-	node_wavelength.osl
-	node_blackbody.osl
-	node_wave_texture.osl
-	node_wireframe.osl
-	node_hair_bsdf.osl
-	node_principled_hair_bsdf.osl
-	node_uv_map.osl
-	node_principled_bsdf.osl
-	node_rgb_to_bw.osl
+  node_add_closure.osl
+  node_ambient_occlusion.osl
+  node_anisotropic_bsdf.osl
+  node_attribute.osl
+  node_background.osl
+  node_bevel.osl
+  node_brick_texture.osl
+  node_brightness.osl
+  node_bump.osl
+  node_camera.osl
+  node_checker_texture.osl
+  node_combine_rgb.osl
+  node_combine_hsv.osl
+  node_combine_xyz.osl
+  node_convert_from_color.osl
+  node_convert_from_float.osl
+  node_convert_from_int.osl
+  node_convert_from_normal.osl
+  node_convert_from_point.osl
+  node_convert_from_vector.osl
+  node_diffuse_bsdf.osl
+  node_displacement.osl
+  node_vector_displacement.osl
+  node_emission.osl
+  node_environment_texture.osl
+  node_fresnel.osl
+  node_gamma.osl
+  node_geometry.osl
+  node_glass_bsdf.osl
+  node_glossy_bsdf.osl
+  node_gradient_texture.osl
+  node_hair_info.osl
+  node_scatter_volume.osl
+  node_absorption_volume.osl
+  node_principled_volume.osl
+  node_holdout.osl
+  node_hsv.osl
+  node_ies_light.osl
+  node_image_texture.osl
+  node_invert.osl
+  node_layer_weight.osl
+  node_light_falloff.osl
+  node_light_path.osl
+  node_magic_texture.osl
+  node_mapping.osl
+  node_math.osl
+  node_mix.osl
+  node_mix_closure.osl
+  node_musgrave_texture.osl
+  node_noise_texture.osl
+  node_normal.osl
+  node_normal_map.osl
+  node_object_info.osl
+  node_output_displacement.osl
+  node_output_surface.osl
+  node_output_volume.osl
+  node_particle_info.osl
+  node_refraction_bsdf.osl
+  node_rgb_curves.osl
+  node_rgb_ramp.osl
+  node_separate_rgb.osl
+  node_separate_hsv.osl
+  node_separate_xyz.osl
+  node_set_normal.osl
+  node_sky_texture.osl
+  node_subsurface_scattering.osl
+  node_tangent.osl
+  node_texture_coordinate.osl
+  node_toon_bsdf.osl
+  node_translucent_bsdf.osl
+  node_transparent_bsdf.osl
+  node_value.osl
+  node_vector_curves.osl
+  node_vector_math.osl
+  node_vector_transform.osl
+  node_velvet_bsdf.osl
+  node_voronoi_texture.osl
+  node_voxel_texture.osl
+  node_wavelength.osl
+  node_blackbody.osl
+  node_wave_texture.osl
+  node_wireframe.osl
+  node_hair_bsdf.osl
+  node_principled_hair_bsdf.osl
+  node_uv_map.osl
+  node_principled_bsdf.osl
+  node_rgb_to_bw.osl
 )
 
 set(SRC_OSL_HEADERS
-	node_color.h
-	node_fresnel.h
-	node_ramp_util.h
-	node_texture.h
-	stdosl.h
-	oslutil.h
+  node_color.h
+  node_fresnel.h
+  node_ramp_util.h
+  node_texture.h
+  stdosl.h
+  oslutil.h
 )
 
 set(SRC_OSO
@@ -106,20 +106,20 @@ set(SRC_OSO
 
 # TODO, add a module to compile OSL
 foreach(_file ${SRC_OSL})
-	set(_OSL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${_file})
-	set_source_files_properties(${_file} PROPERTIES HEADER_FILE_ONLY TRUE)
-	string(REPLACE ".osl" ".oso" _OSO_FILE ${_OSL_FILE})
-	string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE})
-	add_custom_command(
-		OUTPUT ${_OSO_FILE}
-		COMMAND ${OSL_COMPILER} -q -O2  -I"${CMAKE_CURRENT_SOURCE_DIR}" -o ${_OSO_FILE} ${_OSL_FILE}
-		DEPENDS ${_OSL_FILE} ${SRC_OSL_HEADERS} ${OSL_COMPILER})
-	list(APPEND SRC_OSO
-		${_OSO_FILE}
-	)
+  set(_OSL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${_file})
+  set_source_files_properties(${_file} PROPERTIES HEADER_FILE_ONLY TRUE)
+  string(REPLACE ".osl" ".oso" _OSO_FILE ${_OSL_FILE})
+  string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE})
+  add_custom_command(
+    OUTPUT ${_OSO_FILE}
+    COMMAND ${OSL_COMPILER} -q -O2  -I"${CMAKE_CURRENT_SOURCE_DIR}" -o ${_OSO_FILE} ${_OSL_FILE}
+    DEPENDS ${_OSL_FILE} ${SRC_OSL_HEADERS} ${OSL_COMPILER})
+  list(APPEND SRC_OSO
+    ${_OSO_FILE}
+  )
 
-	unset(_OSL_FILE)
-	unset(_OSO_FILE)
+  unset(_OSL_FILE)
+  unset(_OSO_FILE)
 endforeach()
 
 add_custom_target(cycles_osl_shaders ALL DEPENDS ${SRC_OSO} ${SRC_OSL_HEADERS} ${OSL_COMPILER} SOURCES ${SRC_OSL})
diff --git a/intern/cycles/kernel/shaders/node_absorption_volume.osl b/intern/cycles/kernel/shaders/node_absorption_volume.osl
index 18f662ebbbd..e99bd254666 100644
--- a/intern/cycles/kernel/shaders/node_absorption_volume.osl
+++ b/intern/cycles/kernel/shaders/node_absorption_volume.osl
@@ -16,11 +16,9 @@
 
 #include "stdosl.h"
 
-shader node_absorption_volume(
-	color Color = color(0.8, 0.8, 0.8),
-	float Density = 1.0,
-	output closure color Volume = 0)
+shader node_absorption_volume(color Color = color(0.8, 0.8, 0.8),
+                              float Density = 1.0,
+                              output closure color Volume = 0)
 {
-	Volume = ((color(1.0, 1.0, 1.0) - Color) * max(Density, 0.0)) * absorption();
+  Volume = ((color(1.0, 1.0, 1.0) - Color) * max(Density, 0.0)) * absorption();
 }
-
diff --git a/intern/cycles/kernel/shaders/node_add_closure.osl b/intern/cycles/kernel/shaders/node_add_closure.osl
index b6596e0b6bd..077e2735e61 100644
--- a/intern/cycles/kernel/shaders/node_add_closure.osl
+++ b/intern/cycles/kernel/shaders/node_add_closure.osl
@@ -16,11 +16,9 @@
 
 #include "stdosl.h"
 
-shader node_add_closure(
-	closure color Closure1 = 0,
-	closure color Closure2 = 0,
-	output closure color Closure = 0)
+shader node_add_closure(closure color Closure1 = 0,
+                        closure color Closure2 = 0,
+                        output closure color Closure = 0)
 {
-	Closure = Closure1 + Closure2;
+  Closure = Closure1 + Closure2;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
index 825cccd59ce..7bf28719e78 100644
--- a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
+++ b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl
@@ -16,20 +16,28 @@
 
 #include "stdosl.h"
 
-shader node_ambient_occlusion(
-	color ColorIn = color(1.0, 1.0, 1.0),
-	int samples = 16,
-	float Distance = 1.0,
-	normal Normal = N,
-	int inside = 0,
-	int only_local = 0,
-	output color ColorOut = color(1.0, 1.0, 1.0),
-	output float AO = 1.0)
+shader node_ambient_occlusion(color ColorIn = color(1.0, 1.0, 1.0),
+                              int samples = 16,
+                              float Distance = 1.0,
+                              normal Normal = N,
+                              int inside = 0,
+                              int only_local = 0,
+                              output color ColorOut = color(1.0, 1.0, 1.0),
+                              output float AO = 1.0)
 {
-	int global_radius = (Distance == 0.0 && !isconnected(Distance));
+  int global_radius = (Distance == 0.0 && !isconnected(Distance));
 
-	/* Abuse texture call with special @ao token. */
-	AO = texture("@ao", samples, Distance, Normal[0], Normal[1], Normal[2], inside, "sblur", only_local, "tblur", global_radius);
-	ColorOut = ColorIn * AO;
+  /* Abuse texture call with special @ao token. */
+  AO = texture("@ao",
+               samples,
+               Distance,
+               Normal[0],
+               Normal[1],
+               Normal[2],
+               inside,
+               "sblur",
+               only_local,
+               "tblur",
+               global_radius);
+  ColorOut = ColorIn * AO;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
index 21e28ece65d..165c09eb8e0 100644
--- a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
@@ -16,45 +16,43 @@
 
 #include "stdosl.h"
 
-shader node_anisotropic_bsdf(
-	color Color = 0.0,
-	string distribution = "GGX",
-	float Roughness = 0.0,
-	float Anisotropy = 0.0,
-	float Rotation = 0.0,
-	normal Normal = N,
-	normal Tangent = normalize(dPdu),
-	output closure color BSDF = 0)
+shader node_anisotropic_bsdf(color Color = 0.0,
+                             string distribution = "GGX",
+                             float Roughness = 0.0,
+                             float Anisotropy = 0.0,
+                             float Rotation = 0.0,
+                             normal Normal = N,
+                             normal Tangent = normalize(dPdu),
+                             output closure color BSDF = 0)
 {
-	/* rotate tangent around normal */
-	vector T = Tangent;
+  /* rotate tangent around normal */
+  vector T = Tangent;
 
-	if (Rotation != 0.0)
-		T = rotate(T, Rotation * M_2PI, point(0.0, 0.0, 0.0), Normal);
+  if (Rotation != 0.0)
+    T = rotate(T, Rotation * M_2PI, point(0.0, 0.0, 0.0), Normal);
 
-	/* compute roughness */
-	float roughness = Roughness * Roughness;
-	float roughness_u, roughness_v;
-	float aniso = clamp(Anisotropy, -0.99, 0.99);
+  /* compute roughness */
+  float roughness = Roughness * Roughness;
+  float roughness_u, roughness_v;
+  float aniso = clamp(Anisotropy, -0.99, 0.99);
 
-	if (aniso < 0.0) {
-		roughness_u = roughness / (1.0 + aniso);
-		roughness_v = roughness * (1.0 + aniso);
-	}
-	else {
-		roughness_u = roughness * (1.0 - aniso);
-		roughness_v = roughness / (1.0 - aniso);
-	}
+  if (aniso < 0.0) {
+    roughness_u = roughness / (1.0 + aniso);
+    roughness_v = roughness * (1.0 + aniso);
+  }
+  else {
+    roughness_u = roughness * (1.0 - aniso);
+    roughness_v = roughness / (1.0 - aniso);
+  }
 
-	if (distribution == "sharp")
-		BSDF = Color * reflection(Normal);
-	else if (distribution == "beckmann")
-		BSDF = Color * microfacet_beckmann_aniso(Normal, T, roughness_u, roughness_v);
-	else if (distribution == "GGX")
-		BSDF = Color * microfacet_ggx_aniso(Normal, T, roughness_u, roughness_v);
-	else if (distribution == "Multiscatter GGX")
-		BSDF = Color * microfacet_multi_ggx_aniso(Normal, T, roughness_u, roughness_v, Color);
-	else
-		BSDF = Color * ashikhmin_shirley(Normal, T, roughness_u, roughness_v);
+  if (distribution == "sharp")
+    BSDF = Color * reflection(Normal);
+  else if (distribution == "beckmann")
+    BSDF = Color * microfacet_beckmann_aniso(Normal, T, roughness_u, roughness_v);
+  else if (distribution == "GGX")
+    BSDF = Color * microfacet_ggx_aniso(Normal, T, roughness_u, roughness_v);
+  else if (distribution == "Multiscatter GGX")
+    BSDF = Color * microfacet_multi_ggx_aniso(Normal, T, roughness_u, roughness_v, Color);
+  else
+    BSDF = Color * ashikhmin_shirley(Normal, T, roughness_u, roughness_v);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_attribute.osl b/intern/cycles/kernel/shaders/node_attribute.osl
index 67183e9ffe0..336543cc130 100644
--- a/intern/cycles/kernel/shaders/node_attribute.osl
+++ b/intern/cycles/kernel/shaders/node_attribute.osl
@@ -16,26 +16,24 @@
 
 #include "stdosl.h"
 
-shader node_attribute(
-	string bump_offset = "center",
-	string name = "",
-	output point Vector = point(0.0, 0.0, 0.0),
-	output color Color = 0.0,
-	output float Fac = 0.0)
+shader node_attribute(string bump_offset = "center",
+                      string name = "",
+                      output point Vector = point(0.0, 0.0, 0.0),
+                      output color Color = 0.0,
+                      output float Fac = 0.0)
 {
-	getattribute(name, Color);
-	Vector = point(Color);
-	getattribute(name, Fac);
+  getattribute(name, Color);
+  Vector = point(Color);
+  getattribute(name, Fac);
 
-	if (bump_offset == "dx") {
-		Color += Dx(Color);
-		Vector += Dx(Vector);
-		Fac += Dx(Fac);
-	}
-	else if (bump_offset == "dy") {
-		Color += Dy(Color);
-		Vector += Dy(Vector);
-		Fac += Dy(Fac);
-	}
+  if (bump_offset == "dx") {
+    Color += Dx(Color);
+    Vector += Dx(Vector);
+    Fac += Dx(Fac);
+  }
+  else if (bump_offset == "dy") {
+    Color += Dy(Color);
+    Vector += Dy(Vector);
+    Fac += Dy(Fac);
+  }
 }
-
diff --git a/intern/cycles/kernel/shaders/node_background.osl b/intern/cycles/kernel/shaders/node_background.osl
index 613d4e360fa..6ded0d2c65c 100644
--- a/intern/cycles/kernel/shaders/node_background.osl
+++ b/intern/cycles/kernel/shaders/node_background.osl
@@ -16,11 +16,9 @@
 
 #include "stdosl.h"
 
-shader node_background(
-	color Color = 0.8,
-	float Strength = 1.0,
-	output closure color Background = 0)
+shader node_background(color Color = 0.8,
+                       float Strength = 1.0,
+                       output closure color Background = 0)
 {
-	Background = Color * Strength * background();
+  Background = Color * Strength * background();
 }
-
diff --git a/intern/cycles/kernel/shaders/node_bevel.osl b/intern/cycles/kernel/shaders/node_bevel.osl
index 9c4ca15be17..189c20c52e7 100644
--- a/intern/cycles/kernel/shaders/node_bevel.osl
+++ b/intern/cycles/kernel/shaders/node_bevel.osl
@@ -16,16 +16,14 @@
 
 #include "stdosl.h"
 
-shader node_bevel(
-	int samples = 4,
-	float Radius = 0.05,
-	normal NormalIn = N,
-	output normal NormalOut = N)
+shader node_bevel(int samples = 4,
+                  float Radius = 0.05,
+                  normal NormalIn = N,
+                  output normal NormalOut = N)
 {
-	/* Abuse texture call with special @bevel token. */
-	vector bevel_N = (normal)(color)texture("@bevel", samples, Radius);
+  /* Abuse texture call with special @bevel token. */
+  vector bevel_N = (normal)(color)texture("@bevel", samples, Radius);
 
-	/* Preserve input normal. */
-	NormalOut = normalize(NormalIn + (bevel_N - N));
+  /* Preserve input normal. */
+  NormalOut = normalize(NormalIn + (bevel_N - N));
 }
-
diff --git a/intern/cycles/kernel/shaders/node_blackbody.osl b/intern/cycles/kernel/shaders/node_blackbody.osl
index 1da6894d0f0..8a24bf1e28b 100644
--- a/intern/cycles/kernel/shaders/node_blackbody.osl
+++ b/intern/cycles/kernel/shaders/node_blackbody.osl
@@ -16,16 +16,13 @@
 
 #include "stdosl.h"
 
-shader node_blackbody(
-	float Temperature = 1200.0,
-	output color Color = 0.0)
+shader node_blackbody(float Temperature = 1200.0, output color Color = 0.0)
 {
-	color rgb = blackbody(Temperature);
-	
-	/* Scale by luminance */
-	float l = luminance(rgb);
-	if (l != 0.0)
-		rgb /= l;
-	Color = rgb;
-}
+  color rgb = blackbody(Temperature);
 
+  /* Scale by luminance */
+  float l = luminance(rgb);
+  if (l != 0.0)
+    rgb /= l;
+  Color = rgb;
+}
diff --git a/intern/cycles/kernel/shaders/node_brick_texture.osl b/intern/cycles/kernel/shaders/node_brick_texture.osl
index 9d2e5b74ce6..0abc3574c48 100644
--- a/intern/cycles/kernel/shaders/node_brick_texture.osl
+++ b/intern/cycles/kernel/shaders/node_brick_texture.osl
@@ -21,85 +21,100 @@
 
 float brick_noise(int ns) /* fast integer noise */
 {
-	int nn;
-	int n = (ns + 1013) & 2147483647;
-	n = (n >> 13) ^ n;
-	nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 2147483647;
-	return 0.5 * ((float)nn / 1073741824.0);
+  int nn;
+  int n = (ns + 1013) & 2147483647;
+  n = (n >> 13) ^ n;
+  nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 2147483647;
+  return 0.5 * ((float)nn / 1073741824.0);
 }
 
-float brick(point p, float mortar_size, float mortar_smooth, float bias,
-	float BrickWidth, float row_height, float offset_amount, int offset_frequency,
-	float squash_amount, int squash_frequency, output float tint)
+float brick(point p,
+            float mortar_size,
+            float mortar_smooth,
+            float bias,
+            float BrickWidth,
+            float row_height,
+            float offset_amount,
+            int offset_frequency,
+            float squash_amount,
+            int squash_frequency,
+            output float tint)
 {
-	int bricknum, rownum;
-	float offset = 0.0;
-	float brick_width = BrickWidth;
-	float x, y;
-
-	rownum = (int)floor(p[1] / row_height);
-	
-	if (offset_frequency && squash_frequency) {
-		brick_width *= (rownum % squash_frequency) ? 1.0 : squash_amount;                /* squash */
-		offset       = (rownum % offset_frequency) ? 0.0 : (brick_width * offset_amount);  /* offset */
-	}
-
-	bricknum = (int)floor((p[0] + offset) / brick_width);
-
-	x = (p[0] + offset) - brick_width * bricknum;
-	y = p[1] - row_height * rownum;
-
-	tint = clamp((brick_noise((rownum << 16) + (bricknum & 65535)) + bias), 0.0, 1.0);
-
-	float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
-	if(min_dist >= mortar_size) {
-		return 0.0;
-	}
-	else if(mortar_smooth == 0.0) {
-		return 1.0;
-	}
-	else {
-		min_dist = 1.0 - min_dist/mortar_size;
-		return smoothstep(0.0, mortar_smooth, min_dist);
-	}
+  int bricknum, rownum;
+  float offset = 0.0;
+  float brick_width = BrickWidth;
+  float x, y;
+
+  rownum = (int)floor(p[1] / row_height);
+
+  if (offset_frequency && squash_frequency) {
+    brick_width *= (rownum % squash_frequency) ? 1.0 : squash_amount;           /* squash */
+    offset = (rownum % offset_frequency) ? 0.0 : (brick_width * offset_amount); /* offset */
+  }
+
+  bricknum = (int)floor((p[0] + offset) / brick_width);
+
+  x = (p[0] + offset) - brick_width * bricknum;
+  y = p[1] - row_height * rownum;
+
+  tint = clamp((brick_noise((rownum << 16) + (bricknum & 65535)) + bias), 0.0, 1.0);
+
+  float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
+  if (min_dist >= mortar_size) {
+    return 0.0;
+  }
+  else if (mortar_smooth == 0.0) {
+    return 1.0;
+  }
+  else {
+    min_dist = 1.0 - min_dist / mortar_size;
+    return smoothstep(0.0, mortar_smooth, min_dist);
+  }
 }
 
-shader node_brick_texture(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	float offset = 0.5,
-	int offset_frequency = 2,
-	float squash = 1.0,
-	int squash_frequency = 1,
-	point Vector = P,
-	color Color1 = 0.2,
-	color Color2 = 0.8,
-	color Mortar = 0.0,
-	float Scale = 5.0,
-	float MortarSize = 0.02,
-	float MortarSmooth = 0.0,
-	float Bias = 0.0,
-	float BrickWidth = 0.5,
-	float RowHeight = 0.25,
-	output float Fac = 0.0,
-	output color Color = 0.2)
+shader node_brick_texture(int use_mapping = 0,
+                          matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+                          float offset = 0.5,
+                          int offset_frequency = 2,
+                          float squash = 1.0,
+                          int squash_frequency = 1,
+                          point Vector = P,
+                          color Color1 = 0.2,
+                          color Color2 = 0.8,
+                          color Mortar = 0.0,
+                          float Scale = 5.0,
+                          float MortarSize = 0.02,
+                          float MortarSmooth = 0.0,
+                          float Bias = 0.0,
+                          float BrickWidth = 0.5,
+                          float RowHeight = 0.25,
+                          output float Fac = 0.0,
+                          output color Color = 0.2)
 {
-	point p = Vector;
-
-	if (use_mapping)
-		p = transform(mapping, p);
-
-	float tint = 0.0;
-	color Col = Color1;
-	
-	Fac = brick(p * Scale, MortarSize, MortarSmooth, Bias, BrickWidth, RowHeight,
-		offset, offset_frequency, squash, squash_frequency, tint);
-		
-	if (Fac != 1.0) {
-		float facm = 1.0 - tint;
-		Col = facm * Color1 + tint * Color2;
-	}
-	
-	Color = mix(Col, Mortar, Fac);
-}
+  point p = Vector;
+
+  if (use_mapping)
+    p = transform(mapping, p);
+
+  float tint = 0.0;
+  color Col = Color1;
 
+  Fac = brick(p * Scale,
+              MortarSize,
+              MortarSmooth,
+              Bias,
+              BrickWidth,
+              RowHeight,
+              offset,
+              offset_frequency,
+              squash,
+              squash_frequency,
+              tint);
+
+  if (Fac != 1.0) {
+    float facm = 1.0 - tint;
+    Col = facm * Color1 + tint * Color2;
+  }
+
+  Color = mix(Col, Mortar, Fac);
+}
diff --git a/intern/cycles/kernel/shaders/node_brightness.osl b/intern/cycles/kernel/shaders/node_brightness.osl
index 00cfb167885..2defbc4b1db 100644
--- a/intern/cycles/kernel/shaders/node_brightness.osl
+++ b/intern/cycles/kernel/shaders/node_brightness.osl
@@ -16,17 +16,15 @@
 
 #include "stdosl.h"
 
-shader node_brightness(
-	color ColorIn = 0.8,
-	float Bright = 0.0,
-	float Contrast = 0.0,
-	output color ColorOut = 0.8)
+shader node_brightness(color ColorIn = 0.8,
+                       float Bright = 0.0,
+                       float Contrast = 0.0,
+                       output color ColorOut = 0.8)
 {
-	float a = 1.0 + Contrast;
-	float b = Bright - Contrast * 0.5;
+  float a = 1.0 + Contrast;
+  float b = Bright - Contrast * 0.5;
 
-	ColorOut[0] = max(a * ColorIn[0] + b, 0.0);
-	ColorOut[1] = max(a * ColorIn[1] + b, 0.0);
-	ColorOut[2] = max(a * ColorIn[2] + b, 0.0);
+  ColorOut[0] = max(a * ColorIn[0] + b, 0.0);
+  ColorOut[1] = max(a * ColorIn[1] + b, 0.0);
+  ColorOut[2] = max(a * ColorIn[2] + b, 0.0);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_bump.osl b/intern/cycles/kernel/shaders/node_bump.osl
index a2a4468d5f3..3697bb37fd9 100644
--- a/intern/cycles/kernel/shaders/node_bump.osl
+++ b/intern/cycles/kernel/shaders/node_bump.osl
@@ -19,52 +19,50 @@
 /* "Bump Mapping Unparameterized Surfaces on the GPU"
  * Morten S. Mikkelsen, 2010 */
 
-surface node_bump(
-	int invert = 0,
-	int use_object_space = 0,
-	normal NormalIn = N,
-	float Strength = 0.1,
-	float Distance = 1.0,
-	float SampleCenter = 0.0,
-	float SampleX = 0.0,
-	float SampleY = 0.0,
-	output normal NormalOut = N)
+surface node_bump(int invert = 0,
+                  int use_object_space = 0,
+                  normal NormalIn = N,
+                  float Strength = 0.1,
+                  float Distance = 1.0,
+                  float SampleCenter = 0.0,
+                  float SampleX = 0.0,
+                  float SampleY = 0.0,
+                  output normal NormalOut = N)
 {
-	point Ptmp = P;
-	normal Normal = NormalIn;
+  point Ptmp = P;
+  normal Normal = NormalIn;
 
-	if (use_object_space) {
-		Ptmp = transform("object", Ptmp);
-		Normal = normalize(transform("object", Normal));
-	}
+  if (use_object_space) {
+    Ptmp = transform("object", Ptmp);
+    Normal = normalize(transform("object", Normal));
+  }
 
-	/* get surface tangents from normal */
-	vector dPdx = Dx(Ptmp);
-	vector dPdy = Dy(Ptmp);
+  /* get surface tangents from normal */
+  vector dPdx = Dx(Ptmp);
+  vector dPdy = Dy(Ptmp);
 
-	vector Rx = cross(dPdy, Normal);
-	vector Ry = cross(Normal, dPdx);
+  vector Rx = cross(dPdy, Normal);
+  vector Ry = cross(Normal, dPdx);
 
-	/* compute surface gradient and determinant */
-	float det = dot(dPdx, Rx);
-	vector surfgrad = (SampleX - SampleCenter) * Rx + (SampleY - SampleCenter) * Ry;
+  /* compute surface gradient and determinant */
+  float det = dot(dPdx, Rx);
+  vector surfgrad = (SampleX - SampleCenter) * Rx + (SampleY - SampleCenter) * Ry;
 
-	float absdet = fabs(det);
+  float absdet = fabs(det);
 
-	float strength = max(Strength, 0.0);
-	float dist = Distance;
+  float strength = max(Strength, 0.0);
+  float dist = Distance;
 
-	if (invert)
-		dist *= -1.0;
-	
-	/* compute and output perturbed normal */
-	NormalOut = normalize(absdet * Normal - dist * sign(det) * surfgrad);
-	NormalOut = normalize(strength * NormalOut + (1.0 - strength) * Normal);
+  if (invert)
+    dist *= -1.0;
 
-	if (use_object_space) {
-		NormalOut = normalize(transform("object", "world", NormalOut));
-	}
+  /* compute and output perturbed normal */
+  NormalOut = normalize(absdet * Normal - dist * sign(det) * surfgrad);
+  NormalOut = normalize(strength * NormalOut + (1.0 - strength) * Normal);
 
-	NormalOut = ensure_valid_reflection(Ng, I, NormalOut);
-}
+  if (use_object_space) {
+    NormalOut = normalize(transform("object", "world", NormalOut));
+  }
 
+  NormalOut = ensure_valid_reflection(Ng, I, NormalOut);
+}
diff --git a/intern/cycles/kernel/shaders/node_camera.osl b/intern/cycles/kernel/shaders/node_camera.osl
index 5e90cb8b8ee..833e9e775fe 100644
--- a/intern/cycles/kernel/shaders/node_camera.osl
+++ b/intern/cycles/kernel/shaders/node_camera.osl
@@ -16,16 +16,14 @@
 
 #include "stdosl.h"
 
-shader node_camera(
-	output vector ViewVector = vector(0.0, 0.0, 0.0),
-	output float ViewZDepth = 0.0,
-	output float ViewDistance = 0.0)
+shader node_camera(output vector ViewVector = vector(0.0, 0.0, 0.0),
+                   output float ViewZDepth = 0.0,
+                   output float ViewDistance = 0.0)
 {
-	ViewVector = (vector)transform("world", "camera", P);
+  ViewVector = (vector)transform("world", "camera", P);
 
-	ViewZDepth = fabs(ViewVector[2]);
-	ViewDistance = length(ViewVector);
+  ViewZDepth = fabs(ViewVector[2]);
+  ViewDistance = length(ViewVector);
 
-	ViewVector = normalize(ViewVector);
+  ViewVector = normalize(ViewVector);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_checker_texture.osl b/intern/cycles/kernel/shaders/node_checker_texture.osl
index e745cfaee06..e068f7952ed 100644
--- a/intern/cycles/kernel/shaders/node_checker_texture.osl
+++ b/intern/cycles/kernel/shaders/node_checker_texture.osl
@@ -21,44 +21,43 @@
 
 float checker(point ip)
 {
-	point p;
-	p[0] = (ip[0] + 0.000001) * 0.999999;
-	p[1] = (ip[1] + 0.000001) * 0.999999;
-	p[2] = (ip[2] + 0.000001) * 0.999999;
-	
-	int xi = (int)fabs(floor(p[0]));
-	int yi = (int)fabs(floor(p[1]));
-	int zi = (int)fabs(floor(p[2]));
-
-	if ((xi % 2 == yi % 2) == (zi % 2)) {
-		return 1.0;
-	}
-	else {
-		return 0.0;
-	}
+  point p;
+  p[0] = (ip[0] + 0.000001) * 0.999999;
+  p[1] = (ip[1] + 0.000001) * 0.999999;
+  p[2] = (ip[2] + 0.000001) * 0.999999;
+
+  int xi = (int)fabs(floor(p[0]));
+  int yi = (int)fabs(floor(p[1]));
+  int zi = (int)fabs(floor(p[2]));
+
+  if ((xi % 2 == yi % 2) == (zi % 2)) {
+    return 1.0;
+  }
+  else {
+    return 0.0;
+  }
 }
 
 shader node_checker_texture(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	float Scale = 5.0,
-	point Vector = P,
-	color Color1 = 0.8,
-	color Color2 = 0.2,
-	output float Fac = 0.0,
-	output color Color = 0.0)
+    int use_mapping = 0,
+    matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+    float Scale = 5.0,
+    point Vector = P,
+    color Color1 = 0.8,
+    color Color2 = 0.2,
+    output float Fac = 0.0,
+    output color Color = 0.0)
 {
-	point p = Vector;
-
-	if (use_mapping)
-		p = transform(mapping, p);
-
-	Fac = checker(p * Scale);
-	if (Fac == 1.0) {
-		Color = Color1;
-	}
-	else {
-		Color = Color2;
-	}
+  point p = Vector;
+
+  if (use_mapping)
+    p = transform(mapping, p);
+
+  Fac = checker(p * Scale);
+  if (Fac == 1.0) {
+    Color = Color1;
+  }
+  else {
+    Color = Color2;
+  }
 }
-
diff --git a/intern/cycles/kernel/shaders/node_color.h b/intern/cycles/kernel/shaders/node_color.h
index fc758bef1fa..276c91843e8 100644
--- a/intern/cycles/kernel/shaders/node_color.h
+++ b/intern/cycles/kernel/shaders/node_color.h
@@ -18,135 +18,146 @@
 
 float color_srgb_to_scene_linear(float c)
 {
-	if (c < 0.04045)
-		return (c < 0.0) ? 0.0 : c * (1.0 / 12.92);
-	else
-		return pow((c + 0.055) * (1.0 / 1.055), 2.4);
+  if (c < 0.04045)
+    return (c < 0.0) ? 0.0 : c * (1.0 / 12.92);
+  else
+    return pow((c + 0.055) * (1.0 / 1.055), 2.4);
 }
 
 float color_scene_linear_to_srgb(float c)
 {
-	if (c < 0.0031308)
-		return (c < 0.0) ? 0.0 : c * 12.92;
-	else
-		return 1.055 * pow(c, 1.0 / 2.4) - 0.055;
+  if (c < 0.0031308)
+    return (c < 0.0) ? 0.0 : c * 12.92;
+  else
+    return 1.055 * pow(c, 1.0 / 2.4) - 0.055;
 }
 
 color color_srgb_to_scene_linear(color c)
 {
-	return color(
-		color_srgb_to_scene_linear(c[0]),
-		color_srgb_to_scene_linear(c[1]),
-		color_srgb_to_scene_linear(c[2]));
+  return color(color_srgb_to_scene_linear(c[0]),
+               color_srgb_to_scene_linear(c[1]),
+               color_srgb_to_scene_linear(c[2]));
 }
 
 color color_scene_linear_to_srgb(color c)
 {
-	return color(
-		color_scene_linear_to_srgb(c[0]),
-		color_scene_linear_to_srgb(c[1]),
-		color_scene_linear_to_srgb(c[2]));
+  return color(color_scene_linear_to_srgb(c[0]),
+               color_scene_linear_to_srgb(c[1]),
+               color_scene_linear_to_srgb(c[2]));
 }
 
 color color_unpremultiply(color c, float alpha)
 {
-	if (alpha != 1.0 && alpha != 0.0)
-		return c / alpha;
+  if (alpha != 1.0 && alpha != 0.0)
+    return c / alpha;
 
-	return c;
+  return c;
 }
 
 /* Color Operations */
 
 color xyY_to_xyz(float x, float y, float Y)
 {
-	float X, Z;
+  float X, Z;
 
-	if (y != 0.0) X = (x / y) * Y;
-	else X = 0.0;
+  if (y != 0.0)
+    X = (x / y) * Y;
+  else
+    X = 0.0;
 
-	if (y != 0.0 && Y != 0.0) Z = ((1.0 - x - y) / y) * Y;
-	else Z = 0.0;
+  if (y != 0.0 && Y != 0.0)
+    Z = ((1.0 - x - y) / y) * Y;
+  else
+    Z = 0.0;
 
-	return color(X, Y, Z);
+  return color(X, Y, Z);
 }
 
 color xyz_to_rgb(float x, float y, float z)
 {
-	return color( 3.240479 * x + -1.537150 * y + -0.498535 * z,
-	             -0.969256 * x +  1.875991 * y +  0.041556 * z,
-	              0.055648 * x + -0.204043 * y +  1.057311 * z);
+  return color(3.240479 * x + -1.537150 * y + -0.498535 * z,
+               -0.969256 * x + 1.875991 * y + 0.041556 * z,
+               0.055648 * x + -0.204043 * y + 1.057311 * z);
 }
 
 color rgb_to_hsv(color rgb)
 {
-	float cmax, cmin, h, s, v, cdelta;
-	color c;
-
-	cmax = max(rgb[0], max(rgb[1], rgb[2]));
-	cmin = min(rgb[0], min(rgb[1], rgb[2]));
-	cdelta = cmax - cmin;
-
-	v = cmax;
-
-	if (cmax != 0.0) {
-		s = cdelta / cmax;
-	}
-	else {
-		s = 0.0;
-		h = 0.0;
-	}
-
-	if (s == 0.0) {
-		h = 0.0;
-	}
-	else {
-		c = (color(cmax, cmax, cmax) - rgb) / cdelta;
-
-		if (rgb[0] == cmax) h = c[2] - c[1];
-		else if (rgb[1] == cmax) h = 2.0 + c[0] -  c[2];
-		else h = 4.0 + c[1] - c[0];
-
-		h /= 6.0;
-
-		if (h < 0.0)
-			h += 1.0;
-	}
-
-	return color(h, s, v);
+  float cmax, cmin, h, s, v, cdelta;
+  color c;
+
+  cmax = max(rgb[0], max(rgb[1], rgb[2]));
+  cmin = min(rgb[0], min(rgb[1], rgb[2]));
+  cdelta = cmax - cmin;
+
+  v = cmax;
+
+  if (cmax != 0.0) {
+    s = cdelta / cmax;
+  }
+  else {
+    s = 0.0;
+    h = 0.0;
+  }
+
+  if (s == 0.0) {
+    h = 0.0;
+  }
+  else {
+    c = (color(cmax, cmax, cmax) - rgb) / cdelta;
+
+    if (rgb[0] == cmax)
+      h = c[2] - c[1];
+    else if (rgb[1] == cmax)
+      h = 2.0 + c[0] - c[2];
+    else
+      h = 4.0 + c[1] - c[0];
+
+    h /= 6.0;
+
+    if (h < 0.0)
+      h += 1.0;
+  }
+
+  return color(h, s, v);
 }
 
 color hsv_to_rgb(color hsv)
 {
-	float i, f, p, q, t, h, s, v;
-	color rgb;
-
-	h = hsv[0];
-	s = hsv[1];
-	v = hsv[2];
-
-	if (s == 0.0) {
-		rgb = color(v, v, v);
-	}
-	else {
-		if (h == 1.0)
-			h = 0.0;
-
-		h *= 6.0;
-		i = floor(h);
-		f = h - i;
-		rgb = color(f, f, f);
-		p = v * (1.0 - s);
-		q = v * (1.0 - (s * f));
-		t = v * (1.0 - (s * (1.0 - f)));
-
-		if (i == 0.0) rgb = color(v, t, p);
-		else if (i == 1.0) rgb = color(q, v, p);
-		else if (i == 2.0) rgb = color(p, v, t);
-		else if (i == 3.0) rgb = color(p, q, v);
-		else if (i == 4.0) rgb = color(t, p, v);
-		else rgb = color(v, p, q);
-	}
-
-	return rgb;
+  float i, f, p, q, t, h, s, v;
+  color rgb;
+
+  h = hsv[0];
+  s = hsv[1];
+  v = hsv[2];
+
+  if (s == 0.0) {
+    rgb = color(v, v, v);
+  }
+  else {
+    if (h == 1.0)
+      h = 0.0;
+
+    h *= 6.0;
+    i = floor(h);
+    f = h - i;
+    rgb = color(f, f, f);
+    p = v * (1.0 - s);
+    q = v * (1.0 - (s * f));
+    t = v * (1.0 - (s * (1.0 - f)));
+
+    if (i == 0.0)
+      rgb = color(v, t, p);
+    else if (i == 1.0)
+      rgb = color(q, v, p);
+    else if (i == 2.0)
+      rgb = color(p, v, t);
+    else if (i == 3.0)
+      rgb = color(p, q, v);
+    else if (i == 4.0)
+      rgb = color(t, p, v);
+    else
+      rgb = color(v, p, q);
+  }
+
+  return rgb;
 }
diff --git a/intern/cycles/kernel/shaders/node_combine_hsv.osl b/intern/cycles/kernel/shaders/node_combine_hsv.osl
index 6b922bf4e6b..1658cf3d774 100644
--- a/intern/cycles/kernel/shaders/node_combine_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_combine_hsv.osl
@@ -16,12 +16,7 @@
 
 #include "stdosl.h"
 
-shader node_combine_hsv(
-	float H = 0.0,
-	float S = 0.0,
-	float V = 0.0,
-	output color Color = 0.8)
+shader node_combine_hsv(float H = 0.0, float S = 0.0, float V = 0.0, output color Color = 0.8)
 {
-	Color = color("hsv", H, S, V);	
+  Color = color("hsv", H, S, V);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_combine_rgb.osl b/intern/cycles/kernel/shaders/node_combine_rgb.osl
index f343fdefd84..aaa95e9c5af 100644
--- a/intern/cycles/kernel/shaders/node_combine_rgb.osl
+++ b/intern/cycles/kernel/shaders/node_combine_rgb.osl
@@ -16,12 +16,7 @@
 
 #include "stdosl.h"
 
-shader node_combine_rgb(
-	float R = 0.0,
-	float G = 0.0,
-	float B = 0.0,
-	output color Image = 0.8)
+shader node_combine_rgb(float R = 0.0, float G = 0.0, float B = 0.0, output color Image = 0.8)
 {
-	Image = color(R, G, B);
+  Image = color(R, G, B);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_combine_xyz.osl b/intern/cycles/kernel/shaders/node_combine_xyz.osl
index 86182056b09..4ab49168704 100644
--- a/intern/cycles/kernel/shaders/node_combine_xyz.osl
+++ b/intern/cycles/kernel/shaders/node_combine_xyz.osl
@@ -16,12 +16,7 @@
 
 #include "stdosl.h"
 
-shader node_combine_xyz(
-	float X = 0.0,
-	float Y = 0.0,
-	float Z = 0.0,
-	output vector Vector = 0.8)
+shader node_combine_xyz(float X = 0.0, float Y = 0.0, float Z = 0.0, output vector Vector = 0.8)
 {
-	Vector = vector(X, Y, Z);
+  Vector = vector(X, Y, Z);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_color.osl b/intern/cycles/kernel/shaders/node_convert_from_color.osl
index e95a17f6fa1..7ea9a1e4fb3 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_color.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_color.osl
@@ -16,19 +16,17 @@
 
 #include "stdosl.h"
 
-shader node_convert_from_color(
-	color value_color = 0.0,
-	output string value_string = "",
-	output float value_float = 0.0,
-	output int value_int = 0,
-	output vector value_vector = vector(0.0, 0.0, 0.0),
-	output point value_point = point(0.0, 0.0, 0.0),
-	output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_color(color value_color = 0.0,
+                               output string value_string = "",
+                               output float value_float = 0.0,
+                               output int value_int = 0,
+                               output vector value_vector = vector(0.0, 0.0, 0.0),
+                               output point value_point = point(0.0, 0.0, 0.0),
+                               output normal value_normal = normal(0.0, 0.0, 0.0))
 {
-	value_float = value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722;
-	value_int = (int)(value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722);
-	value_vector = vector(value_color[0], value_color[1], value_color[2]);
-	value_point = point(value_color[0], value_color[1], value_color[2]);
-	value_normal = normal(value_color[0], value_color[1], value_color[2]);
+  value_float = value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722;
+  value_int = (int)(value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722);
+  value_vector = vector(value_color[0], value_color[1], value_color[2]);
+  value_point = point(value_color[0], value_color[1], value_color[2]);
+  value_normal = normal(value_color[0], value_color[1], value_color[2]);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_float.osl b/intern/cycles/kernel/shaders/node_convert_from_float.osl
index a5c2e3b26ad..13b5dea0838 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_float.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_float.osl
@@ -16,19 +16,17 @@
 
 #include "stdosl.h"
 
-shader node_convert_from_float(
-	float value_float = 0.0,
-	output string value_string = "",
-	output int value_int = 0,
-	output color value_color = 0.0,
-	output vector value_vector = vector(0.0, 0.0, 0.0),
-	output point value_point = point(0.0, 0.0, 0.0),
-	output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_float(float value_float = 0.0,
+                               output string value_string = "",
+                               output int value_int = 0,
+                               output color value_color = 0.0,
+                               output vector value_vector = vector(0.0, 0.0, 0.0),
+                               output point value_point = point(0.0, 0.0, 0.0),
+                               output normal value_normal = normal(0.0, 0.0, 0.0))
 {
-	value_int = (int)value_float;
-	value_color = color(value_float, value_float, value_float);
-	value_vector = vector(value_float, value_float, value_float);
-	value_point = point(value_float, value_float, value_float);
-	value_normal = normal(value_float, value_float, value_float);
+  value_int = (int)value_float;
+  value_color = color(value_float, value_float, value_float);
+  value_vector = vector(value_float, value_float, value_float);
+  value_point = point(value_float, value_float, value_float);
+  value_normal = normal(value_float, value_float, value_float);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_int.osl b/intern/cycles/kernel/shaders/node_convert_from_int.osl
index 0e6ae711210..a59e025d822 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_int.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_int.osl
@@ -16,20 +16,18 @@
 
 #include "stdosl.h"
 
-shader node_convert_from_int(
-	int value_int = 0,
-	output string value_string = "",
-	output float value_float = 0.0,
-	output color value_color = 0.0,
-	output vector value_vector = vector(0.0, 0.0, 0.0),
-	output point value_point = point(0.0, 0.0, 0.0),
-	output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_int(int value_int = 0,
+                             output string value_string = "",
+                             output float value_float = 0.0,
+                             output color value_color = 0.0,
+                             output vector value_vector = vector(0.0, 0.0, 0.0),
+                             output point value_point = point(0.0, 0.0, 0.0),
+                             output normal value_normal = normal(0.0, 0.0, 0.0))
 {
-	float f = (float)value_int;
-	value_float = f;
-	value_color = color(f, f, f);
-	value_vector = vector(f, f, f);
-	value_point = point(f, f, f);
-	value_normal = normal(f, f, f);
+  float f = (float)value_int;
+  value_float = f;
+  value_color = color(f, f, f);
+  value_vector = vector(f, f, f);
+  value_point = point(f, f, f);
+  value_normal = normal(f, f, f);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_normal.osl b/intern/cycles/kernel/shaders/node_convert_from_normal.osl
index 7fffa7f6169..7bdd94d1941 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_normal.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_normal.osl
@@ -16,19 +16,17 @@
 
 #include "stdosl.h"
 
-shader node_convert_from_normal(
-	normal value_normal = normal(0.0, 0.0, 0.0),
-	output string value_string = "",
-	output float value_float = 0.0,
-	output int value_int = 0,
-	output vector value_vector = vector(0.0, 0.0, 0.0),
-	output color value_color = 0.0,
-	output point value_point = point(0.0, 0.0, 0.0))
+shader node_convert_from_normal(normal value_normal = normal(0.0, 0.0, 0.0),
+                                output string value_string = "",
+                                output float value_float = 0.0,
+                                output int value_int = 0,
+                                output vector value_vector = vector(0.0, 0.0, 0.0),
+                                output color value_color = 0.0,
+                                output point value_point = point(0.0, 0.0, 0.0))
 {
-	value_float = (value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0);
-	value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
-	value_vector = vector(value_normal[0], value_normal[1], value_normal[2]);
-	value_color = color(value_normal[0], value_normal[1], value_normal[2]);
-	value_point = point(value_normal[0], value_normal[1], value_normal[2]);
+  value_float = (value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0);
+  value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
+  value_vector = vector(value_normal[0], value_normal[1], value_normal[2]);
+  value_color = color(value_normal[0], value_normal[1], value_normal[2]);
+  value_point = point(value_normal[0], value_normal[1], value_normal[2]);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_point.osl b/intern/cycles/kernel/shaders/node_convert_from_point.osl
index 9e4930296bb..79c1719e7a7 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_point.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_point.osl
@@ -16,19 +16,17 @@
 
 #include "stdosl.h"
 
-shader node_convert_from_point(
-	point value_point = point(0.0, 0.0, 0.0),
-	output string value_string = "",
-	output float value_float = 0.0,
-	output int value_int = 0,
-	output vector value_vector = vector(0.0, 0.0, 0.0),
-	output color value_color = 0.0,
-	output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_point(point value_point = point(0.0, 0.0, 0.0),
+                               output string value_string = "",
+                               output float value_float = 0.0,
+                               output int value_int = 0,
+                               output vector value_vector = vector(0.0, 0.0, 0.0),
+                               output color value_color = 0.0,
+                               output normal value_normal = normal(0.0, 0.0, 0.0))
 {
-	value_float = (value_point[0] + value_point[1] + value_point[2]) * (1.0 / 3.0);
-	value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
-	value_vector = vector(value_point[0], value_point[1], value_point[2]);
-	value_color = color(value_point[0], value_point[1], value_point[2]);
-	value_normal = normal(value_point[0], value_point[1], value_point[2]);
+  value_float = (value_point[0] + value_point[1] + value_point[2]) * (1.0 / 3.0);
+  value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
+  value_vector = vector(value_point[0], value_point[1], value_point[2]);
+  value_color = color(value_point[0], value_point[1], value_point[2]);
+  value_normal = normal(value_point[0], value_point[1], value_point[2]);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_string.osl b/intern/cycles/kernel/shaders/node_convert_from_string.osl
index cbc6653eada..48d894a6b3e 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_string.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_string.osl
@@ -16,14 +16,12 @@
 
 #include "stdosl.h"
 
-shader node_convert_from_string(
-	string value_string = "",
-	output color value_color = color(0.0, 0.0, 0.0),
-	output float value_float = 0.0,
-	output int value_int = 0,
-	output vector value_vector = vector(0.0, 0.0, 0.0),
-	output point value_point = point(0.0, 0.0, 0.0),
-	output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_string(string value_string = "",
+                                output color value_color = color(0.0, 0.0, 0.0),
+                                output float value_float = 0.0,
+                                output int value_int = 0,
+                                output vector value_vector = vector(0.0, 0.0, 0.0),
+                                output point value_point = point(0.0, 0.0, 0.0),
+                                output normal value_normal = normal(0.0, 0.0, 0.0))
 {
 }
-
diff --git a/intern/cycles/kernel/shaders/node_convert_from_vector.osl b/intern/cycles/kernel/shaders/node_convert_from_vector.osl
index 8bdca469b90..92ab2313bcb 100644
--- a/intern/cycles/kernel/shaders/node_convert_from_vector.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_vector.osl
@@ -16,19 +16,17 @@
 
 #include "stdosl.h"
 
-shader node_convert_from_vector(
-	vector value_vector = vector(0.0, 0.0, 0.0),
-	output string value_string = "",
-	output float value_float = 0.0,
-	output int value_int = 0,
-	output color value_color = color(0.0, 0.0, 0.0),
-	output point value_point = point(0.0, 0.0, 0.0),
-	output normal value_normal = normal(0.0, 0.0, 0.0))
+shader node_convert_from_vector(vector value_vector = vector(0.0, 0.0, 0.0),
+                                output string value_string = "",
+                                output float value_float = 0.0,
+                                output int value_int = 0,
+                                output color value_color = color(0.0, 0.0, 0.0),
+                                output point value_point = point(0.0, 0.0, 0.0),
+                                output normal value_normal = normal(0.0, 0.0, 0.0))
 {
-	value_float = (value_vector[0] + value_vector[1] + value_vector[2]) * (1.0 / 3.0);
-	value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
-	value_color = color(value_vector[0], value_vector[1], value_vector[2]);
-	value_point = point(value_vector[0], value_vector[1], value_vector[2]);
-	value_normal = normal(value_vector[0], value_vector[1], value_vector[2]);
+  value_float = (value_vector[0] + value_vector[1] + value_vector[2]) * (1.0 / 3.0);
+  value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
+  value_color = color(value_vector[0], value_vector[1], value_vector[2]);
+  value_point = point(value_vector[0], value_vector[1], value_vector[2]);
+  value_normal = normal(value_vector[0], value_vector[1], value_vector[2]);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
index 2bef2d65baa..bd5554b838a 100644
--- a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl
@@ -16,15 +16,13 @@
 
 #include "stdosl.h"
 
-shader node_diffuse_bsdf(
-	color Color = 0.8,
-	float Roughness = 0.0,
-	normal Normal = N,
-	output closure color BSDF = 0)
+shader node_diffuse_bsdf(color Color = 0.8,
+                         float Roughness = 0.0,
+                         normal Normal = N,
+                         output closure color BSDF = 0)
 {
-	if (Roughness == 0.0)
-		BSDF = Color * diffuse(Normal);
-	else
-		BSDF = Color * oren_nayar(Normal, Roughness);
+  if (Roughness == 0.0)
+    BSDF = Color * diffuse(Normal);
+  else
+    BSDF = Color * oren_nayar(Normal, Roughness);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_displacement.osl b/intern/cycles/kernel/shaders/node_displacement.osl
index 89f35841527..a1f3b7b7737 100644
--- a/intern/cycles/kernel/shaders/node_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_displacement.osl
@@ -16,23 +16,21 @@
 
 #include "stdosl.h"
 
-shader node_displacement(
-	string space = "object",
-	float Height = 0.0,
-	float Midlevel = 0.5,
-	float Scale = 1.0,
-	normal Normal = N,
-	output vector Displacement = vector(0.0, 0.0, 0.0))
+shader node_displacement(string space = "object",
+                         float Height = 0.0,
+                         float Midlevel = 0.5,
+                         float Scale = 1.0,
+                         normal Normal = N,
+                         output vector Displacement = vector(0.0, 0.0, 0.0))
 {
-	Displacement = Normal;
-	if(space == "object") {
-		Displacement = transform("object", Displacement);
-	}
+  Displacement = Normal;
+  if (space == "object") {
+    Displacement = transform("object", Displacement);
+  }
 
-	Displacement = normalize(Displacement) * (Height - Midlevel) * Scale;
+  Displacement = normalize(Displacement) * (Height - Midlevel) * Scale;
 
-	if(space == "object") {
-		Displacement = transform("object", "world", Displacement);
-	}
+  if (space == "object") {
+    Displacement = transform("object", "world", Displacement);
+  }
 }
-
diff --git a/intern/cycles/kernel/shaders/node_emission.osl b/intern/cycles/kernel/shaders/node_emission.osl
index c36e2a4c0f3..57973f57ac6 100644
--- a/intern/cycles/kernel/shaders/node_emission.osl
+++ b/intern/cycles/kernel/shaders/node_emission.osl
@@ -16,11 +16,7 @@
 
 #include "stdosl.h"
 
-shader node_emission(
-	color Color = 0.8,
-	float Strength = 1.0,
-	output closure color Emission = 0)
+shader node_emission(color Color = 0.8, float Strength = 1.0, output closure color Emission = 0)
 {
-	Emission = (Strength * Color) * emission();
+  Emission = (Strength * Color) * emission();
 }
-
diff --git a/intern/cycles/kernel/shaders/node_environment_texture.osl b/intern/cycles/kernel/shaders/node_environment_texture.osl
index 95d9d813969..eb32dad392f 100644
--- a/intern/cycles/kernel/shaders/node_environment_texture.osl
+++ b/intern/cycles/kernel/shaders/node_environment_texture.osl
@@ -19,63 +19,63 @@
 
 vector environment_texture_direction_to_equirectangular(vector dir)
 {
-	float u = -atan2(dir[1], dir[0]) / (M_2PI) + 0.5;
-	float v = atan2(dir[2], hypot(dir[0], dir[1])) / M_PI + 0.5;
+  float u = -atan2(dir[1], dir[0]) / (M_2PI) + 0.5;
+  float v = atan2(dir[2], hypot(dir[0], dir[1])) / M_PI + 0.5;
 
-	return vector(u, v, 0.0);
+  return vector(u, v, 0.0);
 }
 
 vector environment_texture_direction_to_mirrorball(vector idir)
 {
-	vector dir = idir;
-	dir[1] -= 1.0;
+  vector dir = idir;
+  dir[1] -= 1.0;
 
-	float div = 2.0 * sqrt(max(-0.5 * dir[1], 0.0));
-	if (div > 0.0)
-		dir /= div;
+  float div = 2.0 * sqrt(max(-0.5 * dir[1], 0.0));
+  if (div > 0.0)
+    dir /= div;
 
-	float u = 0.5 * (dir[0] + 1.0);
-	float v = 0.5 * (dir[2] + 1.0);
+  float u = 0.5 * (dir[0] + 1.0);
+  float v = 0.5 * (dir[2] + 1.0);
 
-	return vector(u, v, 0.0);
+  return vector(u, v, 0.0);
 }
 
 shader node_environment_texture(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	vector Vector = P,
-	string filename = "",
-	string projection = "equirectangular",
-	string interpolation = "linear",
-	string color_space = "sRGB",
-	int is_float = 1,
-	int use_alpha = 1,
-	output color Color = 0.0,
-	output float Alpha = 1.0)
+    int use_mapping = 0,
+    matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+    vector Vector = P,
+    string filename = "",
+    string projection = "equirectangular",
+    string interpolation = "linear",
+    string color_space = "sRGB",
+    int is_float = 1,
+    int use_alpha = 1,
+    output color Color = 0.0,
+    output float Alpha = 1.0)
 {
-	vector p = Vector;
+  vector p = Vector;
 
-	if (use_mapping)
-		p = transform(mapping, p);
-	
-	p = normalize(p);
+  if (use_mapping)
+    p = transform(mapping, p);
 
-	if (projection == "equirectangular")
-		p = environment_texture_direction_to_equirectangular(p);
-	else
-		p = environment_texture_direction_to_mirrorball(p);
+  p = normalize(p);
 
-	/* todo: use environment for better texture filtering of equirectangular */
-	Color = (color)texture(filename, p[0], 1.0 - p[1], "wrap", "periodic", "interp", interpolation, "alpha", Alpha);
+  if (projection == "equirectangular")
+    p = environment_texture_direction_to_equirectangular(p);
+  else
+    p = environment_texture_direction_to_mirrorball(p);
 
-	if (use_alpha) {
-		Color = color_unpremultiply(Color, Alpha);
+  /* todo: use environment for better texture filtering of equirectangular */
+  Color = (color)texture(
+      filename, p[0], 1.0 - p[1], "wrap", "periodic", "interp", interpolation, "alpha", Alpha);
 
-		if (!is_float)
-			Color = min(Color, 1.0);
-	}
+  if (use_alpha) {
+    Color = color_unpremultiply(Color, Alpha);
 
-	if (color_space == "sRGB")
-		Color = color_srgb_to_scene_linear(Color);
-}
+    if (!is_float)
+      Color = min(Color, 1.0);
+  }
 
+  if (color_space == "sRGB")
+    Color = color_srgb_to_scene_linear(Color);
+}
diff --git a/intern/cycles/kernel/shaders/node_fresnel.h b/intern/cycles/kernel/shaders/node_fresnel.h
index 40793479d8a..ade1d4c6207 100644
--- a/intern/cycles/kernel/shaders/node_fresnel.h
+++ b/intern/cycles/kernel/shaders/node_fresnel.h
@@ -32,33 +32,31 @@
 
 float fresnel_dielectric_cos(float cosi, float eta)
 {
-	/* compute fresnel reflectance without explicitly computing
-	 * the refracted direction */
-	float c = fabs(cosi);
-	float g = eta * eta - 1 + c * c;
-	float result;
+  /* compute fresnel reflectance without explicitly computing
+   * the refracted direction */
+  float c = fabs(cosi);
+  float g = eta * eta - 1 + c * c;
+  float result;
 
-	if (g > 0) {
-		g = sqrt(g);
-		float A = (g - c) / (g + c);
-		float B = (c * (g + c) - 1) / (c * (g - c) + 1);
-		result = 0.5 * A * A * (1 + B * B);
-	}
-	else
-		result = 1.0;  /* TIR (no refracted component) */
+  if (g > 0) {
+    g = sqrt(g);
+    float A = (g - c) / (g + c);
+    float B = (c * (g + c) - 1) / (c * (g - c) + 1);
+    result = 0.5 * A * A * (1 + B * B);
+  }
+  else
+    result = 1.0; /* TIR (no refracted component) */
 
-	return result;
+  return result;
 }
 
 color fresnel_conductor(float cosi, color eta, color k)
 {
-	color cosi2 = color(cosi * cosi);
-	color one = color(1, 1, 1);
-	color tmp_f = eta * eta + k * k;
-	color tmp = tmp_f * cosi2;
-	color Rparl2 = (tmp - (2.0 * eta * cosi) + one) /
-	               (tmp + (2.0 * eta * cosi) + one);
-	color Rperp2 = (tmp_f - (2.0 * eta * cosi) + cosi2) /
-	               (tmp_f + (2.0 * eta * cosi) + cosi2);
-	return (Rparl2 + Rperp2) * 0.5;
+  color cosi2 = color(cosi * cosi);
+  color one = color(1, 1, 1);
+  color tmp_f = eta * eta + k * k;
+  color tmp = tmp_f * cosi2;
+  color Rparl2 = (tmp - (2.0 * eta * cosi) + one) / (tmp + (2.0 * eta * cosi) + one);
+  color Rperp2 = (tmp_f - (2.0 * eta * cosi) + cosi2) / (tmp_f + (2.0 * eta * cosi) + cosi2);
+  return (Rparl2 + Rperp2) * 0.5;
 }
diff --git a/intern/cycles/kernel/shaders/node_fresnel.osl b/intern/cycles/kernel/shaders/node_fresnel.osl
index 8bec7b432f5..89250db40f3 100644
--- a/intern/cycles/kernel/shaders/node_fresnel.osl
+++ b/intern/cycles/kernel/shaders/node_fresnel.osl
@@ -17,14 +17,10 @@
 #include "stdosl.h"
 #include "node_fresnel.h"
 
-shader node_fresnel(
-	float IOR = 1.45,
-	normal Normal = N,
-	output float Fac = 0.0)
+shader node_fresnel(float IOR = 1.45, normal Normal = N, output float Fac = 0.0)
 {
-	float f = max(IOR, 1e-5);
-	float eta = backfacing() ? 1.0 / f : f;
-	float cosi = dot(I, Normal);
-	Fac = fresnel_dielectric_cos(cosi, eta);
+  float f = max(IOR, 1e-5);
+  float eta = backfacing() ? 1.0 / f : f;
+  float cosi = dot(I, Normal);
+  Fac = fresnel_dielectric_cos(cosi, eta);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_gamma.osl b/intern/cycles/kernel/shaders/node_gamma.osl
index bc4c1b34266..9b9c17dc8af 100644
--- a/intern/cycles/kernel/shaders/node_gamma.osl
+++ b/intern/cycles/kernel/shaders/node_gamma.osl
@@ -16,10 +16,7 @@
 
 #include "stdosl.h"
 
-shader node_gamma(
-	color ColorIn = 0.8,
-	float Gamma = 1.0,
-	output color ColorOut = 0.0)
+shader node_gamma(color ColorIn = 0.8, float Gamma = 1.0, output color ColorOut = 0.0)
 {
-	ColorOut = pow(ColorIn, Gamma);
+  ColorOut = pow(ColorIn, Gamma);
 }
diff --git a/intern/cycles/kernel/shaders/node_geometry.osl b/intern/cycles/kernel/shaders/node_geometry.osl
index b0bd7692489..b5c1c6611c1 100644
--- a/intern/cycles/kernel/shaders/node_geometry.osl
+++ b/intern/cycles/kernel/shaders/node_geometry.osl
@@ -16,55 +16,53 @@
 
 #include "stdosl.h"
 
-shader node_geometry(
-	normal NormalIn = N,
-	string bump_offset = "center",
+shader node_geometry(normal NormalIn = N,
+                     string bump_offset = "center",
 
-	output point Position = point(0.0, 0.0, 0.0),
-	output normal Normal = normal(0.0, 0.0, 0.0),
-	output normal Tangent = normal(0.0, 0.0, 0.0),
-	output normal TrueNormal = normal(0.0, 0.0, 0.0),
-	output vector Incoming = vector(0.0, 0.0, 0.0),
-	output point Parametric = point(0.0, 0.0, 0.0),
-	output float Backfacing = 0.0,
-	output float Pointiness = 0.0)
+                     output point Position = point(0.0, 0.0, 0.0),
+                     output normal Normal = normal(0.0, 0.0, 0.0),
+                     output normal Tangent = normal(0.0, 0.0, 0.0),
+                     output normal TrueNormal = normal(0.0, 0.0, 0.0),
+                     output vector Incoming = vector(0.0, 0.0, 0.0),
+                     output point Parametric = point(0.0, 0.0, 0.0),
+                     output float Backfacing = 0.0,
+                     output float Pointiness = 0.0)
 {
-	Position = P;
-	Normal = NormalIn;
-	TrueNormal = Ng;
-	Incoming = I;
-	Parametric = point(u, v, 0.0);
-	Backfacing = backfacing();
+  Position = P;
+  Normal = NormalIn;
+  TrueNormal = Ng;
+  Incoming = I;
+  Parametric = point(u, v, 0.0);
+  Backfacing = backfacing();
 
-	if (bump_offset == "dx") {
-		Position += Dx(Position);
-		Parametric += Dx(Parametric);
-	}
-	else if (bump_offset == "dy") {
-		Position += Dy(Position);
-		Parametric += Dy(Parametric);
-	}
+  if (bump_offset == "dx") {
+    Position += Dx(Position);
+    Parametric += Dx(Parametric);
+  }
+  else if (bump_offset == "dy") {
+    Position += Dy(Position);
+    Parametric += Dy(Parametric);
+  }
 
-	/* first try to get tangent attribute */
-	point generated;
+  /* first try to get tangent attribute */
+  point generated;
 
-	/* try to create spherical tangent from generated coordinates */
-	if (getattribute("geom:generated", generated)) {
-		normal data = normal(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0);
-		vector T = transform("object", "world", data);
-		Tangent = cross(Normal, normalize(cross(T, Normal)));
-	}
-	else {
-		/* otherwise use surface derivatives */
-		Tangent = normalize(dPdu);
-	}
+  /* try to create spherical tangent from generated coordinates */
+  if (getattribute("geom:generated", generated)) {
+    normal data = normal(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0);
+    vector T = transform("object", "world", data);
+    Tangent = cross(Normal, normalize(cross(T, Normal)));
+  }
+  else {
+    /* otherwise use surface derivatives */
+    Tangent = normalize(dPdu);
+  }
 
-	getattribute("geom:pointiness", Pointiness);
-	if (bump_offset == "dx") {
-		Pointiness += Dx(Pointiness);
-	}
-	else if (bump_offset == "dy") {
-		Pointiness += Dy(Pointiness);
-	}
+  getattribute("geom:pointiness", Pointiness);
+  if (bump_offset == "dx") {
+    Pointiness += Dx(Pointiness);
+  }
+  else if (bump_offset == "dy") {
+    Pointiness += Dy(Pointiness);
+  }
 }
-
diff --git a/intern/cycles/kernel/shaders/node_glass_bsdf.osl b/intern/cycles/kernel/shaders/node_glass_bsdf.osl
index 2e713861c58..c0b8a002536 100644
--- a/intern/cycles/kernel/shaders/node_glass_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_glass_bsdf.osl
@@ -17,29 +17,27 @@
 #include "stdosl.h"
 #include "node_fresnel.h"
 
-shader node_glass_bsdf(
-	color Color = 0.8,
-	string distribution = "sharp",
-	float Roughness = 0.2,
-	float IOR = 1.45,
-	normal Normal = N,
-	output closure color BSDF = 0)
+shader node_glass_bsdf(color Color = 0.8,
+                       string distribution = "sharp",
+                       float Roughness = 0.2,
+                       float IOR = 1.45,
+                       normal Normal = N,
+                       output closure color BSDF = 0)
 {
-	float f = max(IOR, 1e-5);
-	float eta = backfacing() ? 1.0 / f : f;
-	float cosi = dot(I, Normal);
-	float Fr = fresnel_dielectric_cos(cosi, eta);
-	float roughness = Roughness * Roughness;
+  float f = max(IOR, 1e-5);
+  float eta = backfacing() ? 1.0 / f : f;
+  float cosi = dot(I, Normal);
+  float Fr = fresnel_dielectric_cos(cosi, eta);
+  float roughness = Roughness * Roughness;
 
-	if (distribution == "sharp")
-		BSDF = Color * (Fr * reflection(Normal) + (1.0 - Fr) * refraction(Normal, eta));
-	else if (distribution == "beckmann")
-		BSDF = Color * (Fr * microfacet_beckmann(Normal, roughness) +
-		                (1.0 - Fr) * microfacet_beckmann_refraction(Normal, roughness, eta));
-	else if (distribution == "Multiscatter GGX")
-		BSDF = Color * microfacet_multi_ggx_glass(Normal, roughness, eta, Color);
-	else if (distribution == "GGX")
-		BSDF = Color * (Fr * microfacet_ggx(Normal, roughness) +
-		                (1.0 - Fr) * microfacet_ggx_refraction(Normal, roughness, eta));
+  if (distribution == "sharp")
+    BSDF = Color * (Fr * reflection(Normal) + (1.0 - Fr) * refraction(Normal, eta));
+  else if (distribution == "beckmann")
+    BSDF = Color * (Fr * microfacet_beckmann(Normal, roughness) +
+                    (1.0 - Fr) * microfacet_beckmann_refraction(Normal, roughness, eta));
+  else if (distribution == "Multiscatter GGX")
+    BSDF = Color * microfacet_multi_ggx_glass(Normal, roughness, eta, Color);
+  else if (distribution == "GGX")
+    BSDF = Color * (Fr * microfacet_ggx(Normal, roughness) +
+                    (1.0 - Fr) * microfacet_ggx_refraction(Normal, roughness, eta));
 }
-
diff --git a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
index 7415211b56d..2d40ee8d3f6 100644
--- a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
@@ -17,25 +17,22 @@
 #include "stdosl.h"
 #include "node_fresnel.h"
 
-shader node_glossy_bsdf(
-	color Color = 0.8,
-	string distribution = "GGX",
-	float Roughness = 0.2,
-	normal Normal = N,
-	output closure color BSDF = 0)
+shader node_glossy_bsdf(color Color = 0.8,
+                        string distribution = "GGX",
+                        float Roughness = 0.2,
+                        normal Normal = N,
+                        output closure color BSDF = 0)
 {
-	float roughness = Roughness * Roughness;
-
-	if (distribution == "sharp")
-		BSDF = Color * reflection(Normal);
-	else if (distribution == "beckmann")
-		BSDF = Color * microfacet_beckmann(Normal, roughness);
-	else if (distribution == "GGX")
-		BSDF = Color * microfacet_ggx(Normal, roughness);
-	else if (distribution == "Multiscatter GGX")
-		BSDF = Color * microfacet_multi_ggx(Normal, roughness, Color);
-	else
-		BSDF = Color * ashikhmin_shirley(Normal, vector(0, 0, 0), roughness, roughness);
+  float roughness = Roughness * Roughness;
 
+  if (distribution == "sharp")
+    BSDF = Color * reflection(Normal);
+  else if (distribution == "beckmann")
+    BSDF = Color * microfacet_beckmann(Normal, roughness);
+  else if (distribution == "GGX")
+    BSDF = Color * microfacet_ggx(Normal, roughness);
+  else if (distribution == "Multiscatter GGX")
+    BSDF = Color * microfacet_multi_ggx(Normal, roughness, Color);
+  else
+    BSDF = Color * ashikhmin_shirley(Normal, vector(0, 0, 0), roughness, roughness);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_gradient_texture.osl b/intern/cycles/kernel/shaders/node_gradient_texture.osl
index f458937a18f..52bf466673d 100644
--- a/intern/cycles/kernel/shaders/node_gradient_texture.osl
+++ b/intern/cycles/kernel/shaders/node_gradient_texture.osl
@@ -21,59 +21,58 @@
 
 float gradient(point p, string type)
 {
-	float x, y, z;
-	
-	x = p[0];
-	y = p[1];
-	z = p[2];
+  float x, y, z;
 
-	float result = 0.0;
+  x = p[0];
+  y = p[1];
+  z = p[2];
 
-	if (type == "linear") {
-		result = x;
-	}
-	else if (type == "quadratic") {
-		float r = max(x, 0.0);
-		result = r * r;
-	}
-	else if (type == "easing") {
-		float r = min(max(x, 0.0), 1.0);
-		float t = r * r;
-		
-		result = (3.0 * t - 2.0 * t * r);
-	}
-	else if (type == "diagonal") {
-		result = (x + y) * 0.5;
-	}
-	else if (type == "radial") {
-		result = atan2(y, x) / M_2PI + 0.5;
-	}
-	else {
-		float r = max(1.0 - sqrt(x * x + y * y + z * z), 0.0);
+  float result = 0.0;
 
-		if (type == "quadratic_sphere")
-			result = r * r;
-		else if (type == "spherical")
-			result = r;
-	}
+  if (type == "linear") {
+    result = x;
+  }
+  else if (type == "quadratic") {
+    float r = max(x, 0.0);
+    result = r * r;
+  }
+  else if (type == "easing") {
+    float r = min(max(x, 0.0), 1.0);
+    float t = r * r;
 
-	return result;
+    result = (3.0 * t - 2.0 * t * r);
+  }
+  else if (type == "diagonal") {
+    result = (x + y) * 0.5;
+  }
+  else if (type == "radial") {
+    result = atan2(y, x) / M_2PI + 0.5;
+  }
+  else {
+    float r = max(1.0 - sqrt(x * x + y * y + z * z), 0.0);
+
+    if (type == "quadratic_sphere")
+      result = r * r;
+    else if (type == "spherical")
+      result = r;
+  }
+
+  return result;
 }
 
 shader node_gradient_texture(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	string type = "linear",
-	point Vector = P,
-	output float Fac = 0.0,
-	output color Color = 0.0)
+    int use_mapping = 0,
+    matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+    string type = "linear",
+    point Vector = P,
+    output float Fac = 0.0,
+    output color Color = 0.0)
 {
-	point p = Vector;
+  point p = Vector;
 
-	if (use_mapping)
-		p = transform(mapping, p);
+  if (use_mapping)
+    p = transform(mapping, p);
 
-	Fac = gradient(p, type);
-	Color = color(Fac, Fac, Fac);
+  Fac = gradient(p, type);
+  Color = color(Fac, Fac, Fac);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_hair_bsdf.osl
index ef8f2fae894..bc912087666 100644
--- a/intern/cycles/kernel/shaders/node_hair_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_hair_bsdf.osl
@@ -18,41 +18,40 @@
 
 #include "stdosl.h"
 
-shader node_hair_bsdf(
-	color Color = 0.8,
-	string component = "reflection",
-	float Offset = 0.0,
-	float RoughnessU = 0.1,
-	float RoughnessV = 1.0,
-	normal Tangent = normal(0, 0, 0),
-	output closure color BSDF = 0)
+shader node_hair_bsdf(color Color = 0.8,
+                      string component = "reflection",
+                      float Offset = 0.0,
+                      float RoughnessU = 0.1,
+                      float RoughnessV = 1.0,
+                      normal Tangent = normal(0, 0, 0),
+                      output closure color BSDF = 0)
 {
-	float roughnessh = clamp(RoughnessU, 0.001, 1.0);
-	float roughnessv = clamp(RoughnessV, 0.001, 1.0);
-	float offset = -Offset;
+  float roughnessh = clamp(RoughnessU, 0.001, 1.0);
+  float roughnessv = clamp(RoughnessV, 0.001, 1.0);
+  float offset = -Offset;
 
-	normal T;
-	float IsCurve = 0;
-	getattribute("geom:is_curve", IsCurve);
+  normal T;
+  float IsCurve = 0;
+  getattribute("geom:is_curve", IsCurve);
 
-	if (isconnected(Tangent)) {
-		T = Tangent;
-	}
-	else if(!IsCurve) {
-		T = normalize(dPdv);
-		offset = 0.0;
-	}
-	else {
-		T = normalize(dPdu);
-	}
+  if (isconnected(Tangent)) {
+    T = Tangent;
+  }
+  else if (!IsCurve) {
+    T = normalize(dPdv);
+    offset = 0.0;
+  }
+  else {
+    T = normalize(dPdu);
+  }
 
-	if (backfacing() && IsCurve) {
-		BSDF = transparent();
-	}
-	else {
-		if (component == "reflection")
-			BSDF = Color * hair_reflection(Ng, roughnessh, roughnessv, T, offset);
-		else
-			BSDF = Color * hair_transmission(Ng, roughnessh, roughnessv, T, offset);
-	}
+  if (backfacing() && IsCurve) {
+    BSDF = transparent();
+  }
+  else {
+    if (component == "reflection")
+      BSDF = Color * hair_reflection(Ng, roughnessh, roughnessv, T, offset);
+    else
+      BSDF = Color * hair_transmission(Ng, roughnessh, roughnessv, T, offset);
+  }
 }
diff --git a/intern/cycles/kernel/shaders/node_hair_info.osl b/intern/cycles/kernel/shaders/node_hair_info.osl
index 19216f67579..991a27c4103 100644
--- a/intern/cycles/kernel/shaders/node_hair_info.osl
+++ b/intern/cycles/kernel/shaders/node_hair_info.osl
@@ -16,17 +16,15 @@
 
 #include "stdosl.h"
 
-shader node_hair_info(
-	output float IsStrand = 0.0,
-	output float Intercept = 0.0,
-	output float Thickness = 0.0,
-	output normal TangentNormal = N,
-	output float Random = 0)
+shader node_hair_info(output float IsStrand = 0.0,
+                      output float Intercept = 0.0,
+                      output float Thickness = 0.0,
+                      output normal TangentNormal = N,
+                      output float Random = 0)
 {
-	getattribute("geom:is_curve", IsStrand);
-	getattribute("geom:curve_intercept", Intercept);
-	getattribute("geom:curve_thickness", Thickness);
-	getattribute("geom:curve_tangent_normal", TangentNormal);
-	getattribute("geom:curve_random", Random);
+  getattribute("geom:is_curve", IsStrand);
+  getattribute("geom:curve_intercept", Intercept);
+  getattribute("geom:curve_thickness", Thickness);
+  getattribute("geom:curve_tangent_normal", TangentNormal);
+  getattribute("geom:curve_random", Random);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_holdout.osl b/intern/cycles/kernel/shaders/node_holdout.osl
index 78a9f46fd15..b51bc0543a5 100644
--- a/intern/cycles/kernel/shaders/node_holdout.osl
+++ b/intern/cycles/kernel/shaders/node_holdout.osl
@@ -16,9 +16,6 @@
 
 #include "stdosl.h"
 
-shader node_holdout(
-	output closure color Holdout = holdout())
+shader node_holdout(output closure color Holdout = holdout())
 {
-
 }
-
diff --git a/intern/cycles/kernel/shaders/node_hsv.osl b/intern/cycles/kernel/shaders/node_hsv.osl
index d72a87a951f..30c56a20a92 100644
--- a/intern/cycles/kernel/shaders/node_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_hsv.osl
@@ -17,28 +17,26 @@
 #include "stdosl.h"
 #include "node_color.h"
 
-shader node_hsv(
-	float Hue = 0.5,
-	float Saturation = 1.0,
-	float Value = 1.0,
-	float Fac = 0.5,
-	color ColorIn = 0.0,
-	output color ColorOut = 0.0)
+shader node_hsv(float Hue = 0.5,
+                float Saturation = 1.0,
+                float Value = 1.0,
+                float Fac = 0.5,
+                color ColorIn = 0.0,
+                output color ColorOut = 0.0)
 {
-	color Color = rgb_to_hsv(ColorIn);
+  color Color = rgb_to_hsv(ColorIn);
 
-	// remember: fmod doesn't work for negative numbers
-	Color[0] = fmod(Color[0] + Hue + 0.5, 1.0);
-	Color[1] = clamp(Color[1] * Saturation, 0.0, 1.0);
-	Color[2] *= Value;
+  // remember: fmod doesn't work for negative numbers
+  Color[0] = fmod(Color[0] + Hue + 0.5, 1.0);
+  Color[1] = clamp(Color[1] * Saturation, 0.0, 1.0);
+  Color[2] *= Value;
 
-	Color = hsv_to_rgb(Color);
+  Color = hsv_to_rgb(Color);
 
-	// Clamp color to prevent negative values cauzed by oversaturation.
-	Color[0] = max(Color[0], 0.0);
-	Color[1] = max(Color[1], 0.0);
-	Color[2] = max(Color[2], 0.0);
+  // Clamp color to prevent negative values cauzed by oversaturation.
+  Color[0] = max(Color[0], 0.0);
+  Color[1] = max(Color[1], 0.0);
+  Color[2] = max(Color[2], 0.0);
 
-	ColorOut = mix(ColorIn, Color, Fac);
+  ColorOut = mix(ColorIn, Color, Fac);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_ies_light.osl b/intern/cycles/kernel/shaders/node_ies_light.osl
index a0954e3a444..ea8c44e09de 100644
--- a/intern/cycles/kernel/shaders/node_ies_light.osl
+++ b/intern/cycles/kernel/shaders/node_ies_light.osl
@@ -19,24 +19,23 @@
 
 /* IES Light */
 
-shader node_ies_light(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	int slot = 0,
-	float Strength = 1.0,
-	point Vector = I,
-	output float Fac = 0.0)
+shader node_ies_light(int use_mapping = 0,
+                      matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+                      int slot = 0,
+                      float Strength = 1.0,
+                      point Vector = I,
+                      output float Fac = 0.0)
 {
-	point p = Vector;
+  point p = Vector;
 
-	if (use_mapping) {
-		p = transform(mapping, p);
-	}
+  if (use_mapping) {
+    p = transform(mapping, p);
+  }
 
-	p = normalize(p);
+  p = normalize(p);
 
-	float v_angle = acos(-p[2]);
-	float h_angle = atan2(p[0], p[1]) + M_PI;
+  float v_angle = acos(-p[2]);
+  float h_angle = atan2(p[0], p[1]) + M_PI;
 
-	Fac = Strength * texture(format("@l%d", slot), h_angle, v_angle);
+  Fac = Strength * texture(format("@l%d", slot), h_angle, v_angle);
 }
diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl
index 7cd2922dd4f..df5eda39985 100644
--- a/intern/cycles/kernel/shaders/node_image_texture.osl
+++ b/intern/cycles/kernel/shaders/node_image_texture.osl
@@ -19,217 +19,217 @@
 
 point texco_remap_square(point co)
 {
-	return (co - point(0.5, 0.5, 0.5)) * 2.0;
+  return (co - point(0.5, 0.5, 0.5)) * 2.0;
 }
 
 point map_to_tube(vector dir)
 {
-	float u, v;
-	v = (dir[2] + 1.0) * 0.5;
-	float len = sqrt(dir[0] * dir[0] + dir[1] * dir[1]);
-	if (len > 0.0) {
-		u = (1.0 - (atan2(dir[0] / len, dir[1] / len) / M_PI)) * 0.5;
-	}
-	else {
-		v = u = 0.0; /* To avoid un-initialized variables. */
-	}
-	return point(u, v, 0.0);
+  float u, v;
+  v = (dir[2] + 1.0) * 0.5;
+  float len = sqrt(dir[0] * dir[0] + dir[1] * dir[1]);
+  if (len > 0.0) {
+    u = (1.0 - (atan2(dir[0] / len, dir[1] / len) / M_PI)) * 0.5;
+  }
+  else {
+    v = u = 0.0; /* To avoid un-initialized variables. */
+  }
+  return point(u, v, 0.0);
 }
 
 point map_to_sphere(vector dir)
 {
-	float len = length(dir);
-	float v, u;
-	if (len > 0.0) {
-		if (dir[0] == 0.0 && dir[1] == 0.0) {
-			u = 0.0;  /* Othwise domain error. */
-		}
-		else {
-			u = (1.0 - atan2(dir[0], dir[1]) / M_PI) / 2.0;
-		}
-		v = 1.0 - acos(dir[2] / len) / M_PI;
-	}
-	else {
-		v = u = 0.0;  /* To avoid un-initialized variables. */
-	}
-	return point(u, v, 0.0);
+  float len = length(dir);
+  float v, u;
+  if (len > 0.0) {
+    if (dir[0] == 0.0 && dir[1] == 0.0) {
+      u = 0.0; /* Othwise domain error. */
+    }
+    else {
+      u = (1.0 - atan2(dir[0], dir[1]) / M_PI) / 2.0;
+    }
+    v = 1.0 - acos(dir[2] / len) / M_PI;
+  }
+  else {
+    v = u = 0.0; /* To avoid un-initialized variables. */
+  }
+  return point(u, v, 0.0);
 }
 
 color image_texture_lookup(string filename,
                            string color_space,
-                           float u, float v,
+                           float u,
+                           float v,
                            output float Alpha,
                            int use_alpha,
                            int is_float,
                            string interpolation,
                            string extension)
 {
-	color rgb = (color)texture(filename, u, 1.0 - v, "wrap", extension, "interp", interpolation, "alpha", Alpha);
+  color rgb = (color)texture(
+      filename, u, 1.0 - v, "wrap", extension, "interp", interpolation, "alpha", Alpha);
 
-	if (use_alpha) {
-		rgb = color_unpremultiply(rgb, Alpha);
-	
-		if (!is_float)
-			rgb = min(rgb, 1.0);
-	}
+  if (use_alpha) {
+    rgb = color_unpremultiply(rgb, Alpha);
 
-	if (color_space == "sRGB") {
-		rgb = color_srgb_to_scene_linear(rgb);
-	}
+    if (!is_float)
+      rgb = min(rgb, 1.0);
+  }
 
-	return rgb;
+  if (color_space == "sRGB") {
+    rgb = color_srgb_to_scene_linear(rgb);
+  }
+
+  return rgb;
 }
 
-shader node_image_texture(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	point Vector = P,
-	string filename = "",
-	string color_space = "sRGB",
-	string projection = "flat",
-	string interpolation = "smartcubic",
-	string extension = "periodic",
-	float projection_blend = 0.0,
-	int is_float = 1,
-	int use_alpha = 1,
-	output color Color = 0.0,
-	output float Alpha = 1.0)
+shader node_image_texture(int use_mapping = 0,
+                          matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+                          point Vector = P,
+                          string filename = "",
+                          string color_space = "sRGB",
+                          string projection = "flat",
+                          string interpolation = "smartcubic",
+                          string extension = "periodic",
+                          float projection_blend = 0.0,
+                          int is_float = 1,
+                          int use_alpha = 1,
+                          output color Color = 0.0,
+                          output float Alpha = 1.0)
 {
-	point p = Vector;
-
-	if (use_mapping)
-		p = transform(mapping, p);
-	
-	if (projection == "flat") {
-		Color = image_texture_lookup(filename,
-		                             color_space,
-		                             p[0], p[1],
-		                             Alpha,
-		                             use_alpha,
-		                             is_float,
-		                             interpolation,
-		                             extension);
-	}
-	else if (projection == "box") {
-		/* object space normal */
-		vector Nob = transform("world", "object", N);
-
-		/* project from direction vector to barycentric coordinates in triangles */
-		Nob = vector(fabs(Nob[0]), fabs(Nob[1]), fabs(Nob[2]));
-		Nob /= (Nob[0] + Nob[1] + Nob[2]);
-
-		/* basic idea is to think of this as a triangle, each corner representing
-		 * one of the 3 faces of the cube. in the corners we have single textures,
-		 * in between we blend between two textures, and in the middle we a blend
-		 * between three textures.
-		 *
-		 * the Nxyz values are the barycentric coordinates in an equilateral
-		 * triangle, which in case of blending, in the middle has a smaller
-		 * equilateral triangle where 3 textures blend. this divides things into
-		 * 7 zones, with an if () test for each zone */
-
-		vector weight = vector(0.0, 0.0, 0.0);
-		float blend = projection_blend;
-		float limit = 0.5 * (1.0 + blend);
-
-		/* first test for corners with single texture */
-		if (Nob[0] > limit * (Nob[0] + Nob[1]) && Nob[0] > limit * (Nob[0] + Nob[2])) {
-			weight[0] = 1.0;
-		}
-		else if (Nob[1] > limit * (Nob[0] + Nob[1]) && Nob[1] > limit * (Nob[1] + Nob[2])) {
-			weight[1] = 1.0;
-		}
-		else if (Nob[2] > limit * (Nob[0] + Nob[2]) && Nob[2] > limit * (Nob[1] + Nob[2])) {
-			weight[2] = 1.0;
-		}
-		else if (blend > 0.0) {
-			/* in case of blending, test for mixes between two textures */
-			if (Nob[2] < (1.0 - limit) * (Nob[1] + Nob[0])) {
-				weight[0] = Nob[0] / (Nob[0] + Nob[1]);
-				weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
-				weight[1] = 1.0 - weight[0];
-			}
-			else if (Nob[0] < (1.0 - limit) * (Nob[1] + Nob[2])) {
-				weight[1] = Nob[1] / (Nob[1] + Nob[2]);
-				weight[1] = clamp((weight[1] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
-				weight[2] = 1.0 - weight[1];
-			}
-			else if (Nob[1] < (1.0 - limit) * (Nob[0] + Nob[2])) {
-				weight[0] = Nob[0] / (Nob[0] + Nob[2]);
-				weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
-				weight[2] = 1.0 - weight[0];
-			}
-			else {
-				/* last case, we have a mix between three */
-				weight[0] = ((2.0 - limit) * Nob[0] + (limit - 1.0)) / (2.0 * limit - 1.0);
-				weight[1] = ((2.0 - limit) * Nob[1] + (limit - 1.0)) / (2.0 * limit - 1.0);
-				weight[2] = ((2.0 - limit) * Nob[2] + (limit - 1.0)) / (2.0 * limit - 1.0);
-			}
-		}
-		else {
-			/* Desperate mode, no valid choice anyway, fallback to one side.*/
-			weight[0] = 1.0;
-		}
-
-		Color = color(0.0, 0.0, 0.0);
-		Alpha = 0.0;
-
-		float tmp_alpha;
-
-		if (weight[0] > 0.0) {
-			Color += weight[0] * image_texture_lookup(filename,
-			                                          color_space,
-			                                          p[1], p[2],
-			                                          tmp_alpha,
-			                                          use_alpha,
-			                                          is_float,
-			                                          interpolation,
-			                                          extension);
-			Alpha += weight[0] * tmp_alpha;
-		}
-		if (weight[1] > 0.0) {
-			Color += weight[1] * image_texture_lookup(filename,
-			                                          color_space,
-			                                          p[0], p[2],
-			                                          tmp_alpha,
-			                                          use_alpha,
-			                                          is_float,
-			                                          interpolation,
-			                                          extension);
-			Alpha += weight[1] * tmp_alpha;
-		}
-		if (weight[2] > 0.0) {
-			Color += weight[2] * image_texture_lookup(filename,
-			                                          color_space,
-			                                          p[1], p[0],
-			                                          tmp_alpha,
-			                                          use_alpha,
-			                                          is_float,
-			                                          interpolation,
-			                                          extension);
-			Alpha += weight[2] * tmp_alpha;
-		}
-	}
-	else if (projection == "sphere") {
-		point projected = map_to_sphere(texco_remap_square(p));
-		Color = image_texture_lookup(filename,
-		                             color_space,
-		                             projected[0], projected[1],
-		                             Alpha,
-		                             use_alpha,
-		                             is_float,
-		                             interpolation,
-		                             extension);
-	}
-	else if (projection == "tube") {
-		point projected = map_to_tube(texco_remap_square(p));
-		Color = image_texture_lookup(filename,
-		                             color_space,
-		                             projected[0], projected[1],
-		                             Alpha,
-		                             use_alpha,
-		                             is_float,
-		                             interpolation,
-		                             extension);
-	}
+  point p = Vector;
+
+  if (use_mapping)
+    p = transform(mapping, p);
+
+  if (projection == "flat") {
+    Color = image_texture_lookup(
+        filename, color_space, p[0], p[1], Alpha, use_alpha, is_float, interpolation, extension);
+  }
+  else if (projection == "box") {
+    /* object space normal */
+    vector Nob = transform("world", "object", N);
+
+    /* project from direction vector to barycentric coordinates in triangles */
+    Nob = vector(fabs(Nob[0]), fabs(Nob[1]), fabs(Nob[2]));
+    Nob /= (Nob[0] + Nob[1] + Nob[2]);
+
+    /* basic idea is to think of this as a triangle, each corner representing
+     * one of the 3 faces of the cube. in the corners we have single textures,
+     * in between we blend between two textures, and in the middle we a blend
+     * between three textures.
+     *
+     * the Nxyz values are the barycentric coordinates in an equilateral
+     * triangle, which in case of blending, in the middle has a smaller
+     * equilateral triangle where 3 textures blend. this divides things into
+     * 7 zones, with an if () test for each zone */
+
+    vector weight = vector(0.0, 0.0, 0.0);
+    float blend = projection_blend;
+    float limit = 0.5 * (1.0 + blend);
+
+    /* first test for corners with single texture */
+    if (Nob[0] > limit * (Nob[0] + Nob[1]) && Nob[0] > limit * (Nob[0] + Nob[2])) {
+      weight[0] = 1.0;
+    }
+    else if (Nob[1] > limit * (Nob[0] + Nob[1]) && Nob[1] > limit * (Nob[1] + Nob[2])) {
+      weight[1] = 1.0;
+    }
+    else if (Nob[2] > limit * (Nob[0] + Nob[2]) && Nob[2] > limit * (Nob[1] + Nob[2])) {
+      weight[2] = 1.0;
+    }
+    else if (blend > 0.0) {
+      /* in case of blending, test for mixes between two textures */
+      if (Nob[2] < (1.0 - limit) * (Nob[1] + Nob[0])) {
+        weight[0] = Nob[0] / (Nob[0] + Nob[1]);
+        weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
+        weight[1] = 1.0 - weight[0];
+      }
+      else if (Nob[0] < (1.0 - limit) * (Nob[1] + Nob[2])) {
+        weight[1] = Nob[1] / (Nob[1] + Nob[2]);
+        weight[1] = clamp((weight[1] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
+        weight[2] = 1.0 - weight[1];
+      }
+      else if (Nob[1] < (1.0 - limit) * (Nob[0] + Nob[2])) {
+        weight[0] = Nob[0] / (Nob[0] + Nob[2]);
+        weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0);
+        weight[2] = 1.0 - weight[0];
+      }
+      else {
+        /* last case, we have a mix between three */
+        weight[0] = ((2.0 - limit) * Nob[0] + (limit - 1.0)) / (2.0 * limit - 1.0);
+        weight[1] = ((2.0 - limit) * Nob[1] + (limit - 1.0)) / (2.0 * limit - 1.0);
+        weight[2] = ((2.0 - limit) * Nob[2] + (limit - 1.0)) / (2.0 * limit - 1.0);
+      }
+    }
+    else {
+      /* Desperate mode, no valid choice anyway, fallback to one side.*/
+      weight[0] = 1.0;
+    }
+
+    Color = color(0.0, 0.0, 0.0);
+    Alpha = 0.0;
+
+    float tmp_alpha;
+
+    if (weight[0] > 0.0) {
+      Color += weight[0] * image_texture_lookup(filename,
+                                                color_space,
+                                                p[1],
+                                                p[2],
+                                                tmp_alpha,
+                                                use_alpha,
+                                                is_float,
+                                                interpolation,
+                                                extension);
+      Alpha += weight[0] * tmp_alpha;
+    }
+    if (weight[1] > 0.0) {
+      Color += weight[1] * image_texture_lookup(filename,
+                                                color_space,
+                                                p[0],
+                                                p[2],
+                                                tmp_alpha,
+                                                use_alpha,
+                                                is_float,
+                                                interpolation,
+                                                extension);
+      Alpha += weight[1] * tmp_alpha;
+    }
+    if (weight[2] > 0.0) {
+      Color += weight[2] * image_texture_lookup(filename,
+                                                color_space,
+                                                p[1],
+                                                p[0],
+                                                tmp_alpha,
+                                                use_alpha,
+                                                is_float,
+                                                interpolation,
+                                                extension);
+      Alpha += weight[2] * tmp_alpha;
+    }
+  }
+  else if (projection == "sphere") {
+    point projected = map_to_sphere(texco_remap_square(p));
+    Color = image_texture_lookup(filename,
+                                 color_space,
+                                 projected[0],
+                                 projected[1],
+                                 Alpha,
+                                 use_alpha,
+                                 is_float,
+                                 interpolation,
+                                 extension);
+  }
+  else if (projection == "tube") {
+    point projected = map_to_tube(texco_remap_square(p));
+    Color = image_texture_lookup(filename,
+                                 color_space,
+                                 projected[0],
+                                 projected[1],
+                                 Alpha,
+                                 use_alpha,
+                                 is_float,
+                                 interpolation,
+                                 extension);
+  }
 }
diff --git a/intern/cycles/kernel/shaders/node_invert.osl b/intern/cycles/kernel/shaders/node_invert.osl
index b33b0a43d63..c7d41e4e129 100644
--- a/intern/cycles/kernel/shaders/node_invert.osl
+++ b/intern/cycles/kernel/shaders/node_invert.osl
@@ -16,12 +16,8 @@
 
 #include "stdosl.h"
 
-shader node_invert(
-	float Fac = 1.0,
-	color ColorIn = 0.8,
-	output color ColorOut = 0.8)
+shader node_invert(float Fac = 1.0, color ColorIn = 0.8, output color ColorOut = 0.8)
 {
-	color ColorInv = color(1.0) - ColorIn;
-	ColorOut = mix(ColorIn, ColorInv, Fac);
+  color ColorInv = color(1.0) - ColorIn;
+  ColorOut = mix(ColorIn, ColorInv, Fac);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_layer_weight.osl b/intern/cycles/kernel/shaders/node_layer_weight.osl
index f583df25773..7c46f28b41b 100644
--- a/intern/cycles/kernel/shaders/node_layer_weight.osl
+++ b/intern/cycles/kernel/shaders/node_layer_weight.osl
@@ -17,29 +17,28 @@
 #include "stdosl.h"
 #include "node_fresnel.h"
 
-shader node_layer_weight(
-	float Blend = 0.5,
-	normal Normal = N,
-	output float Fresnel = 0.0,
-	output float Facing = 0.0)
+shader node_layer_weight(float Blend = 0.5,
+                         normal Normal = N,
+                         output float Fresnel = 0.0,
+                         output float Facing = 0.0)
 {
-	float blend = Blend;
-	float cosi = dot(I, Normal);
+  float blend = Blend;
+  float cosi = dot(I, Normal);
 
-	/* Fresnel */ 
-	float eta = max(1.0 - Blend, 1e-5);
-	eta = backfacing() ? eta : 1.0 / eta;
-	Fresnel = fresnel_dielectric_cos(cosi, eta);
+  /* Fresnel */
+  float eta = max(1.0 - Blend, 1e-5);
+  eta = backfacing() ? eta : 1.0 / eta;
+  Fresnel = fresnel_dielectric_cos(cosi, eta);
 
-	/* Facing */ 
-	Facing = fabs(cosi);
+  /* Facing */
+  Facing = fabs(cosi);
 
-	if (blend != 0.5) {
-		blend = clamp(blend, 0.0, 1.0 - 1e-5);
-		blend = (blend < 0.5) ? 2.0 * blend : 0.5 / (1.0 - blend);
+  if (blend != 0.5) {
+    blend = clamp(blend, 0.0, 1.0 - 1e-5);
+    blend = (blend < 0.5) ? 2.0 * blend : 0.5 / (1.0 - blend);
 
-		Facing = pow(Facing, blend);
-	}
+    Facing = pow(Facing, blend);
+  }
 
-	Facing = 1.0 - Facing;
+  Facing = 1.0 - Facing;
 }
diff --git a/intern/cycles/kernel/shaders/node_light_falloff.osl b/intern/cycles/kernel/shaders/node_light_falloff.osl
index a594e33d643..d0d7dd9c5aa 100644
--- a/intern/cycles/kernel/shaders/node_light_falloff.osl
+++ b/intern/cycles/kernel/shaders/node_light_falloff.osl
@@ -16,29 +16,27 @@
 
 #include "stdosl.h"
 
-shader node_light_falloff(
-	float Strength = 0.0,
-	float Smooth = 0.0,
-	output float Quadratic = 0.0,
-	output float Linear = 0.0,
-	output float Constant = 0.0)
+shader node_light_falloff(float Strength = 0.0,
+                          float Smooth = 0.0,
+                          output float Quadratic = 0.0,
+                          output float Linear = 0.0,
+                          output float Constant = 0.0)
 {
-	float ray_length = 0.0;
-	float strength = Strength;
-	getattribute("path:ray_length", ray_length);
+  float ray_length = 0.0;
+  float strength = Strength;
+  getattribute("path:ray_length", ray_length);
 
-	if (Smooth > 0.0) {
-		float squared = ray_length * ray_length;
-		strength *= squared / (Smooth + squared);
-	}
+  if (Smooth > 0.0) {
+    float squared = ray_length * ray_length;
+    strength *= squared / (Smooth + squared);
+  }
 
-	/* Quadratic */ 
-	Quadratic = strength;
-	
-	/* Linear */
-	Linear = (strength * ray_length);
+  /* Quadratic */
+  Quadratic = strength;
 
-	/* Constant */
-	Constant = (strength * ray_length * ray_length);
-}
+  /* Linear */
+  Linear = (strength * ray_length);
 
+  /* Constant */
+  Constant = (strength * ray_length * ray_length);
+}
diff --git a/intern/cycles/kernel/shaders/node_light_path.osl b/intern/cycles/kernel/shaders/node_light_path.osl
index 64fe4c20132..c4a3624a67f 100644
--- a/intern/cycles/kernel/shaders/node_light_path.osl
+++ b/intern/cycles/kernel/shaders/node_light_path.osl
@@ -16,51 +16,49 @@
 
 #include "stdosl.h"
 
-shader node_light_path(
-	output float IsCameraRay = 0.0,
-	output float IsShadowRay = 0.0,
-	output float IsDiffuseRay = 0.0,
-	output float IsGlossyRay = 0.0,
-	output float IsSingularRay = 0.0,
-	output float IsReflectionRay = 0.0,
-	output float IsTransmissionRay = 0.0,
-	output float IsVolumeScatterRay = 0.0,
-	output float RayLength = 0.0,
-	output float RayDepth = 0.0,
-	output float DiffuseDepth = 0.0,
-	output float GlossyDepth = 0.0,
-	output float TransparentDepth = 0.0,
-	output float TransmissionDepth = 0.0)
+shader node_light_path(output float IsCameraRay = 0.0,
+                       output float IsShadowRay = 0.0,
+                       output float IsDiffuseRay = 0.0,
+                       output float IsGlossyRay = 0.0,
+                       output float IsSingularRay = 0.0,
+                       output float IsReflectionRay = 0.0,
+                       output float IsTransmissionRay = 0.0,
+                       output float IsVolumeScatterRay = 0.0,
+                       output float RayLength = 0.0,
+                       output float RayDepth = 0.0,
+                       output float DiffuseDepth = 0.0,
+                       output float GlossyDepth = 0.0,
+                       output float TransparentDepth = 0.0,
+                       output float TransmissionDepth = 0.0)
 {
-	IsCameraRay = raytype("camera");
-	IsShadowRay = raytype("shadow");
-	IsDiffuseRay = raytype("diffuse");
-	IsGlossyRay = raytype("glossy");
-	IsSingularRay = raytype("singular");
-	IsReflectionRay = raytype("reflection");
-	IsTransmissionRay = raytype("refraction");
-	IsVolumeScatterRay = raytype("volume_scatter");
+  IsCameraRay = raytype("camera");
+  IsShadowRay = raytype("shadow");
+  IsDiffuseRay = raytype("diffuse");
+  IsGlossyRay = raytype("glossy");
+  IsSingularRay = raytype("singular");
+  IsReflectionRay = raytype("reflection");
+  IsTransmissionRay = raytype("refraction");
+  IsVolumeScatterRay = raytype("volume_scatter");
 
-	getattribute("path:ray_length", RayLength);
+  getattribute("path:ray_length", RayLength);
 
-	int ray_depth;
-	getattribute("path:ray_depth", ray_depth);
-	RayDepth = (float)ray_depth;
+  int ray_depth;
+  getattribute("path:ray_depth", ray_depth);
+  RayDepth = (float)ray_depth;
 
-	int diffuse_depth;
-	getattribute("path:diffuse_depth", diffuse_depth);
-	DiffuseDepth = (float)diffuse_depth;
+  int diffuse_depth;
+  getattribute("path:diffuse_depth", diffuse_depth);
+  DiffuseDepth = (float)diffuse_depth;
 
-	int glossy_depth;
-	getattribute("path:glossy_depth", glossy_depth);
-	GlossyDepth = (float)glossy_depth;
+  int glossy_depth;
+  getattribute("path:glossy_depth", glossy_depth);
+  GlossyDepth = (float)glossy_depth;
 
-	int transparent_depth;
-	getattribute("path:transparent_depth", transparent_depth);
-	TransparentDepth = (float)transparent_depth;
+  int transparent_depth;
+  getattribute("path:transparent_depth", transparent_depth);
+  TransparentDepth = (float)transparent_depth;
 
-	int transmission_depth;
-	getattribute("path:transmission_depth", transmission_depth);
-	TransmissionDepth = (float)transmission_depth;
+  int transmission_depth;
+  getattribute("path:transmission_depth", transmission_depth);
+  TransmissionDepth = (float)transmission_depth;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_magic_texture.osl b/intern/cycles/kernel/shaders/node_magic_texture.osl
index 8d6af391e04..aa700e575ef 100644
--- a/intern/cycles/kernel/shaders/node_magic_texture.osl
+++ b/intern/cycles/kernel/shaders/node_magic_texture.osl
@@ -21,91 +21,89 @@
 
 color magic(point p, int n, float distortion)
 {
-	float dist = distortion;
-
-	float x =  sin(( p[0] + p[1] + p[2]) * 5.0);
-	float y =  cos((-p[0] + p[1] - p[2]) * 5.0);
-	float z = -cos((-p[0] - p[1] + p[2]) * 5.0);
-
-	if (n > 0) {
-		x *= dist;
-		y *= dist;
-		z *= dist;
-		y = -cos(x - y + z);
-		y *= dist;
-
-		if (n > 1) {
-			x = cos(x - y - z);
-			x *= dist;
-
-			if (n > 2) {
-				z = sin(-x - y - z);
-				z *= dist;
-
-				if (n > 3) {
-					x = -cos(-x + y - z);
-					x *= dist;
-
-					if (n > 4) {
-						y = -sin(-x + y + z);
-						y *= dist;
-
-						if (n > 5) {
-							y = -cos(-x + y + z);
-							y *= dist;
-
-							if (n > 6) {
-								x = cos(x + y + z);
-								x *= dist;
-
-								if (n > 7) {
-									z = sin(x + y - z);
-									z *= dist;
-
-									if (n > 8) {
-										x = -cos(-x - y + z);
-										x *= dist;
-
-										if (n > 9) {
-											y = -sin(x - y + z);
-											y *= dist;
-										}
-									}
-								}
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-
-	if (dist != 0.0) {
-		dist *= 2.0;
-		x /= dist;
-		y /= dist;
-		z /= dist;
-	}
-
-	return color(0.5 - x, 0.5 - y, 0.5 - z);
+  float dist = distortion;
+
+  float x = sin((p[0] + p[1] + p[2]) * 5.0);
+  float y = cos((-p[0] + p[1] - p[2]) * 5.0);
+  float z = -cos((-p[0] - p[1] + p[2]) * 5.0);
+
+  if (n > 0) {
+    x *= dist;
+    y *= dist;
+    z *= dist;
+    y = -cos(x - y + z);
+    y *= dist;
+
+    if (n > 1) {
+      x = cos(x - y - z);
+      x *= dist;
+
+      if (n > 2) {
+        z = sin(-x - y - z);
+        z *= dist;
+
+        if (n > 3) {
+          x = -cos(-x + y - z);
+          x *= dist;
+
+          if (n > 4) {
+            y = -sin(-x + y + z);
+            y *= dist;
+
+            if (n > 5) {
+              y = -cos(-x + y + z);
+              y *= dist;
+
+              if (n > 6) {
+                x = cos(x + y + z);
+                x *= dist;
+
+                if (n > 7) {
+                  z = sin(x + y - z);
+                  z *= dist;
+
+                  if (n > 8) {
+                    x = -cos(-x - y + z);
+                    x *= dist;
+
+                    if (n > 9) {
+                      y = -sin(x - y + z);
+                      y *= dist;
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (dist != 0.0) {
+    dist *= 2.0;
+    x /= dist;
+    y /= dist;
+    z /= dist;
+  }
+
+  return color(0.5 - x, 0.5 - y, 0.5 - z);
 }
 
-shader node_magic_texture(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	int depth = 2,
-	float Distortion = 5.0,
-	float Scale = 5.0,
-	point Vector = P,
-	output float Fac = 0.0,
-	output color Color = 0.0)
+shader node_magic_texture(int use_mapping = 0,
+                          matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+                          int depth = 2,
+                          float Distortion = 5.0,
+                          float Scale = 5.0,
+                          point Vector = P,
+                          output float Fac = 0.0,
+                          output color Color = 0.0)
 {
-	point p = Vector;
+  point p = Vector;
 
-	if (use_mapping)
-		p = transform(mapping, p);
+  if (use_mapping)
+    p = transform(mapping, p);
 
-	Color = magic(p * Scale, depth, Distortion);
-	Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0);
+  Color = magic(p * Scale, depth, Distortion);
+  Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_mapping.osl b/intern/cycles/kernel/shaders/node_mapping.osl
index 69106957ee4..f5cc2d1c5dd 100644
--- a/intern/cycles/kernel/shaders/node_mapping.osl
+++ b/intern/cycles/kernel/shaders/node_mapping.osl
@@ -16,18 +16,17 @@
 
 #include "stdosl.h"
 
-shader node_mapping(
-	matrix Matrix = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	point mapping_min = point(0.0, 0.0, 0.0),
-	point mapping_max = point(0.0, 0.0, 0.0),
-	int use_minmax = 0,
-	point VectorIn = point(0.0, 0.0, 0.0),
-	output point VectorOut = point(0.0, 0.0, 0.0))
+shader node_mapping(matrix Matrix = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+                    point mapping_min = point(0.0, 0.0, 0.0),
+                    point mapping_max = point(0.0, 0.0, 0.0),
+                    int use_minmax = 0,
+                    point VectorIn = point(0.0, 0.0, 0.0),
+                    output point VectorOut = point(0.0, 0.0, 0.0))
 {
-	point p = transform(Matrix, VectorIn);
+  point p = transform(Matrix, VectorIn);
 
-	if (use_minmax)
-		p = min(max(mapping_min, p), mapping_max);
-	
-	VectorOut = p;
+  if (use_minmax)
+    p = min(max(mapping_min, p), mapping_max);
+
+  VectorOut = p;
 }
diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl
index aa9f6e671c3..8830339e05f 100644
--- a/intern/cycles/kernel/shaders/node_math.osl
+++ b/intern/cycles/kernel/shaders/node_math.osl
@@ -18,107 +18,105 @@
 
 float safe_divide(float a, float b)
 {
-	float result;
+  float result;
 
-	if (b == 0.0)
-		result = 0.0;
-	else
-		result = a / b;
-	
-	return result;
+  if (b == 0.0)
+    result = 0.0;
+  else
+    result = a / b;
+
+  return result;
 }
 
 float safe_modulo(float a, float b)
 {
-	float result;
+  float result;
+
+  if (b == 0.0)
+    result = 0.0;
+  else
+    result = fmod(a, b);
 
-	if (b == 0.0)
-		result = 0.0;
-	else
-		result = fmod(a, b);
-	
-	return result;
+  return result;
 }
 
 float safe_sqrt(float a)
 {
-	float result;
+  float result;
 
-	if (a > 0.0)
-		result = sqrt(a);
-	else
-		result = 0.0;
+  if (a > 0.0)
+    result = sqrt(a);
+  else
+    result = 0.0;
 
-	return result;
+  return result;
 }
 
 float safe_log(float a, float b)
 {
-	if (a < 0.0 || b < 0.0)
-		return 0.0;
-	
-	return log(a) / log(b);
+  if (a < 0.0 || b < 0.0)
+    return 0.0;
+
+  return log(a) / log(b);
 }
 
-shader node_math(
-	string type = "add",
-	int use_clamp = 0,
-	float Value1 = 0.0,
-	float Value2 = 0.0,
-	output float Value = 0.0)
+shader node_math(string type = "add",
+                 int use_clamp = 0,
+                 float Value1 = 0.0,
+                 float Value2 = 0.0,
+                 output float Value = 0.0)
 {
-	/* OSL asin, acos, pow check for values that could give rise to nan */
+  /* OSL asin, acos, pow check for values that could give rise to nan */
 
-	if (type == "add")
-		Value = Value1 + Value2;
-	else if (type == "subtract")
-		Value = Value1 - Value2;
-	else if (type == "multiply")
-		Value = Value1 * Value2;
-	else if (type == "divide")
-		Value = safe_divide(Value1, Value2);
-	else if (type == "sine")
-		Value = sin(Value1);
-	else if (type == "cosine")
-		Value = cos(Value1);
-	else if (type == "tangent")
-		Value = tan(Value1);
-	else if (type == "arcsine")
-		Value = asin(Value1);
-	else if (type == "arccosine")
-		Value = acos(Value1);
-	else if (type == "arctangent")
-		Value = atan(Value1);
-	else if (type == "power")
-		Value = pow(Value1, Value2);
-	else if (type == "logarithm")
-		Value = safe_log(Value1, Value2);
-	else if (type == "minimum")
-		Value = min(Value1, Value2);
-	else if (type == "maximum")
-		Value = max(Value1, Value2);
-	else if (type == "round")
-		Value = floor(Value1 + 0.5);
-	else if (type == "less_than")
-		Value = Value1 < Value2;
-	else if (type == "greater_than")
-		Value = Value1 > Value2;
-	else if (type == "modulo")
-		Value = safe_modulo(Value1, Value2);
-	else if (type == "absolute")
-		Value = fabs(Value1);
-	else if (type == "arctan2")
-		Value = atan2(Value1, Value2);
-	else if (type == "floor")
-		Value = floor(Value1);
-	else if (type == "ceil")
-		Value = ceil(Value1);
-	else if (type == "fract")
-		Value = Value1 - floor(Value1);
-	else if (type == "sqrt")
-		Value = safe_sqrt(Value1);
+  if (type == "add")
+    Value = Value1 + Value2;
+  else if (type == "subtract")
+    Value = Value1 - Value2;
+  else if (type == "multiply")
+    Value = Value1 * Value2;
+  else if (type == "divide")
+    Value = safe_divide(Value1, Value2);
+  else if (type == "sine")
+    Value = sin(Value1);
+  else if (type == "cosine")
+    Value = cos(Value1);
+  else if (type == "tangent")
+    Value = tan(Value1);
+  else if (type == "arcsine")
+    Value = asin(Value1);
+  else if (type == "arccosine")
+    Value = acos(Value1);
+  else if (type == "arctangent")
+    Value = atan(Value1);
+  else if (type == "power")
+    Value = pow(Value1, Value2);
+  else if (type == "logarithm")
+    Value = safe_log(Value1, Value2);
+  else if (type == "minimum")
+    Value = min(Value1, Value2);
+  else if (type == "maximum")
+    Value = max(Value1, Value2);
+  else if (type == "round")
+    Value = floor(Value1 + 0.5);
+  else if (type == "less_than")
+    Value = Value1 < Value2;
+  else if (type == "greater_than")
+    Value = Value1 > Value2;
+  else if (type == "modulo")
+    Value = safe_modulo(Value1, Value2);
+  else if (type == "absolute")
+    Value = fabs(Value1);
+  else if (type == "arctan2")
+    Value = atan2(Value1, Value2);
+  else if (type == "floor")
+    Value = floor(Value1);
+  else if (type == "ceil")
+    Value = ceil(Value1);
+  else if (type == "fract")
+    Value = Value1 - floor(Value1);
+  else if (type == "sqrt")
+    Value = safe_sqrt(Value1);
 
-	if (use_clamp)
-		Value = clamp(Value, 0.0, 1.0);
+  if (use_clamp)
+    Value = clamp(Value, 0.0, 1.0);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_mix.osl b/intern/cycles/kernel/shaders/node_mix.osl
index 0862c34b6e1..8caea6803ed 100644
--- a/intern/cycles/kernel/shaders/node_mix.osl
+++ b/intern/cycles/kernel/shaders/node_mix.osl
@@ -19,311 +19,312 @@
 
 color node_mix_blend(float t, color col1, color col2)
 {
-	return mix(col1, col2, t);
+  return mix(col1, col2, t);
 }
 
 color node_mix_add(float t, color col1, color col2)
 {
-	return mix(col1, col1 + col2, t);
+  return mix(col1, col1 + col2, t);
 }
 
 color node_mix_mul(float t, color col1, color col2)
 {
-	return mix(col1, col1 * col2, t);
+  return mix(col1, col1 * col2, t);
 }
 
 color node_mix_screen(float t, color col1, color col2)
 {
-	float tm = 1.0 - t;
+  float tm = 1.0 - t;
 
-	return color(1.0) - (color(tm) + t * (color(1.0) - col2)) * (color(1.0) - col1);
+  return color(1.0) - (color(tm) + t * (color(1.0) - col2)) * (color(1.0) - col1);
 }
 
 color node_mix_overlay(float t, color col1, color col2)
 {
-	float tm = 1.0 - t;
-
-	color outcol = col1;
-
-	if (outcol[0] < 0.5)
-		outcol[0] *= tm + 2.0 * t * col2[0];
-	else
-		outcol[0] = 1.0 - (tm + 2.0 * t * (1.0 - col2[0])) * (1.0 - outcol[0]);
-
-	if (outcol[1] < 0.5)
-		outcol[1] *= tm + 2.0 * t * col2[1];
-	else
-		outcol[1] = 1.0 - (tm + 2.0 * t * (1.0 - col2[1])) * (1.0 - outcol[1]);
-
-	if (outcol[2] < 0.5)
-		outcol[2] *= tm + 2.0 * t * col2[2];
-	else
-		outcol[2] = 1.0 - (tm + 2.0 * t * (1.0 - col2[2])) * (1.0 - outcol[2]);
-	
-	return outcol;
+  float tm = 1.0 - t;
+
+  color outcol = col1;
+
+  if (outcol[0] < 0.5)
+    outcol[0] *= tm + 2.0 * t * col2[0];
+  else
+    outcol[0] = 1.0 - (tm + 2.0 * t * (1.0 - col2[0])) * (1.0 - outcol[0]);
+
+  if (outcol[1] < 0.5)
+    outcol[1] *= tm + 2.0 * t * col2[1];
+  else
+    outcol[1] = 1.0 - (tm + 2.0 * t * (1.0 - col2[1])) * (1.0 - outcol[1]);
+
+  if (outcol[2] < 0.5)
+    outcol[2] *= tm + 2.0 * t * col2[2];
+  else
+    outcol[2] = 1.0 - (tm + 2.0 * t * (1.0 - col2[2])) * (1.0 - outcol[2]);
+
+  return outcol;
 }
 
 color node_mix_sub(float t, color col1, color col2)
 {
-	return mix(col1, col1 - col2, t);
+  return mix(col1, col1 - col2, t);
 }
 
 color node_mix_div(float t, color col1, color col2)
 {
-	float tm = 1.0 - t;
+  float tm = 1.0 - t;
 
-	color outcol = col1;
+  color outcol = col1;
 
-	if (col2[0] != 0.0) outcol[0] = tm * outcol[0] + t * outcol[0] / col2[0];
-	if (col2[1] != 0.0) outcol[1] = tm * outcol[1] + t * outcol[1] / col2[1];
-	if (col2[2] != 0.0) outcol[2] = tm * outcol[2] + t * outcol[2] / col2[2];
+  if (col2[0] != 0.0)
+    outcol[0] = tm * outcol[0] + t * outcol[0] / col2[0];
+  if (col2[1] != 0.0)
+    outcol[1] = tm * outcol[1] + t * outcol[1] / col2[1];
+  if (col2[2] != 0.0)
+    outcol[2] = tm * outcol[2] + t * outcol[2] / col2[2];
 
-	return outcol;
+  return outcol;
 }
 
 color node_mix_diff(float t, color col1, color col2)
 {
-	return mix(col1, abs(col1 - col2), t);
+  return mix(col1, abs(col1 - col2), t);
 }
 
 color node_mix_dark(float t, color col1, color col2)
 {
-	return min(col1, col2) * t + col1 * (1.0 - t);
+  return min(col1, col2) * t + col1 * (1.0 - t);
 }
 
 color node_mix_light(float t, color col1, color col2)
 {
-	return max(col1, col2 * t);
+  return max(col1, col2 * t);
 }
 
 color node_mix_dodge(float t, color col1, color col2)
 {
-	color outcol = col1;
-
-	if (outcol[0] != 0.0) {
-		float tmp = 1.0 - t * col2[0];
-		if (tmp <= 0.0)
-			outcol[0] = 1.0;
-		else if ((tmp = outcol[0] / tmp) > 1.0)
-			outcol[0] = 1.0;
-		else
-			outcol[0] = tmp;
-	}
-	if (outcol[1] != 0.0) {
-		float tmp = 1.0 - t * col2[1];
-		if (tmp <= 0.0)
-			outcol[1] = 1.0;
-		else if ((tmp = outcol[1] / tmp) > 1.0)
-			outcol[1] = 1.0;
-		else
-			outcol[1] = tmp;
-	}
-	if (outcol[2] != 0.0) {
-		float tmp = 1.0 - t * col2[2];
-		if (tmp <= 0.0)
-			outcol[2] = 1.0;
-		else if ((tmp = outcol[2] / tmp) > 1.0)
-			outcol[2] = 1.0;
-		else
-			outcol[2] = tmp;
-	}
-
-	return outcol;
+  color outcol = col1;
+
+  if (outcol[0] != 0.0) {
+    float tmp = 1.0 - t * col2[0];
+    if (tmp <= 0.0)
+      outcol[0] = 1.0;
+    else if ((tmp = outcol[0] / tmp) > 1.0)
+      outcol[0] = 1.0;
+    else
+      outcol[0] = tmp;
+  }
+  if (outcol[1] != 0.0) {
+    float tmp = 1.0 - t * col2[1];
+    if (tmp <= 0.0)
+      outcol[1] = 1.0;
+    else if ((tmp = outcol[1] / tmp) > 1.0)
+      outcol[1] = 1.0;
+    else
+      outcol[1] = tmp;
+  }
+  if (outcol[2] != 0.0) {
+    float tmp = 1.0 - t * col2[2];
+    if (tmp <= 0.0)
+      outcol[2] = 1.0;
+    else if ((tmp = outcol[2] / tmp) > 1.0)
+      outcol[2] = 1.0;
+    else
+      outcol[2] = tmp;
+  }
+
+  return outcol;
 }
 
 color node_mix_burn(float t, color col1, color col2)
 {
-	float tmp, tm = 1.0 - t;
-
-	color outcol = col1;
-
-	tmp = tm + t * col2[0];
-	if (tmp <= 0.0)
-		outcol[0] = 0.0;
-	else if ((tmp = (1.0 - (1.0 - outcol[0]) / tmp)) < 0.0)
-		outcol[0] = 0.0;
-	else if (tmp > 1.0)
-		outcol[0] = 1.0;
-	else
-		outcol[0] = tmp;
-
-	tmp = tm + t * col2[1];
-	if (tmp <= 0.0)
-		outcol[1] = 0.0;
-	else if ((tmp = (1.0 - (1.0 - outcol[1]) / tmp)) < 0.0)
-		outcol[1] = 0.0;
-	else if (tmp > 1.0)
-		outcol[1] = 1.0;
-	else
-		outcol[1] = tmp;
-
-	tmp = tm + t * col2[2];
-	if (tmp <= 0.0)
-		outcol[2] = 0.0;
-	else if ((tmp = (1.0 - (1.0 - outcol[2]) / tmp)) < 0.0)
-		outcol[2] = 0.0;
-	else if (tmp > 1.0)
-		outcol[2] = 1.0;
-	else
-		outcol[2] = tmp;
-	
-	return outcol;
+  float tmp, tm = 1.0 - t;
+
+  color outcol = col1;
+
+  tmp = tm + t * col2[0];
+  if (tmp <= 0.0)
+    outcol[0] = 0.0;
+  else if ((tmp = (1.0 - (1.0 - outcol[0]) / tmp)) < 0.0)
+    outcol[0] = 0.0;
+  else if (tmp > 1.0)
+    outcol[0] = 1.0;
+  else
+    outcol[0] = tmp;
+
+  tmp = tm + t * col2[1];
+  if (tmp <= 0.0)
+    outcol[1] = 0.0;
+  else if ((tmp = (1.0 - (1.0 - outcol[1]) / tmp)) < 0.0)
+    outcol[1] = 0.0;
+  else if (tmp > 1.0)
+    outcol[1] = 1.0;
+  else
+    outcol[1] = tmp;
+
+  tmp = tm + t * col2[2];
+  if (tmp <= 0.0)
+    outcol[2] = 0.0;
+  else if ((tmp = (1.0 - (1.0 - outcol[2]) / tmp)) < 0.0)
+    outcol[2] = 0.0;
+  else if (tmp > 1.0)
+    outcol[2] = 1.0;
+  else
+    outcol[2] = tmp;
+
+  return outcol;
 }
 
 color node_mix_hue(float t, color col1, color col2)
 {
-	color outcol = col1;
-	color hsv2 = rgb_to_hsv(col2);
+  color outcol = col1;
+  color hsv2 = rgb_to_hsv(col2);
 
-	if (hsv2[1] != 0.0) {
-		color hsv = rgb_to_hsv(outcol);
-		hsv[0] = hsv2[0];
-		color tmp = hsv_to_rgb(hsv); 
+  if (hsv2[1] != 0.0) {
+    color hsv = rgb_to_hsv(outcol);
+    hsv[0] = hsv2[0];
+    color tmp = hsv_to_rgb(hsv);
 
-		outcol = mix(outcol, tmp, t);
-	}
+    outcol = mix(outcol, tmp, t);
+  }
 
-	return outcol;
+  return outcol;
 }
 
 color node_mix_sat(float t, color col1, color col2)
 {
-	float tm = 1.0 - t;
+  float tm = 1.0 - t;
 
-	color outcol = col1;
+  color outcol = col1;
 
-	color hsv = rgb_to_hsv(outcol);
+  color hsv = rgb_to_hsv(outcol);
 
-	if (hsv[1] != 0.0) {
-		color hsv2 = rgb_to_hsv(col2);
+  if (hsv[1] != 0.0) {
+    color hsv2 = rgb_to_hsv(col2);
 
-		hsv[1] = tm * hsv[1] + t * hsv2[1];
-		outcol = hsv_to_rgb(hsv);
-	}
+    hsv[1] = tm * hsv[1] + t * hsv2[1];
+    outcol = hsv_to_rgb(hsv);
+  }
 
-	return outcol;
+  return outcol;
 }
 
 color node_mix_val(float t, color col1, color col2)
 {
-	float tm = 1.0 - t;
+  float tm = 1.0 - t;
 
-	color hsv = rgb_to_hsv(col1);
-	color hsv2 = rgb_to_hsv(col2);
+  color hsv = rgb_to_hsv(col1);
+  color hsv2 = rgb_to_hsv(col2);
 
-	hsv[2] = tm * hsv[2] + t * hsv2[2];
+  hsv[2] = tm * hsv[2] + t * hsv2[2];
 
-	return hsv_to_rgb(hsv);
+  return hsv_to_rgb(hsv);
 }
 
 color node_mix_color(float t, color col1, color col2)
 {
-	color outcol = col1;
-	color hsv2 = rgb_to_hsv(col2);
+  color outcol = col1;
+  color hsv2 = rgb_to_hsv(col2);
 
-	if (hsv2[1] != 0.0) {
-		color hsv = rgb_to_hsv(outcol);
-		hsv[0] = hsv2[0];
-		hsv[1] = hsv2[1];
-		color tmp = hsv_to_rgb(hsv); 
+  if (hsv2[1] != 0.0) {
+    color hsv = rgb_to_hsv(outcol);
+    hsv[0] = hsv2[0];
+    hsv[1] = hsv2[1];
+    color tmp = hsv_to_rgb(hsv);
 
-		outcol = mix(outcol, tmp, t);
-	}
+    outcol = mix(outcol, tmp, t);
+  }
 
-	return outcol;
+  return outcol;
 }
 
 color node_mix_soft(float t, color col1, color col2)
 {
-	float tm = 1.0 - t;
+  float tm = 1.0 - t;
 
-	color one = color(1.0);
-	color scr = one - (one - col2) * (one - col1);
+  color one = color(1.0);
+  color scr = one - (one - col2) * (one - col1);
 
-	return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr);
+  return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr);
 }
 
 color node_mix_linear(float t, color col1, color col2)
 {
-	color outcol = col1;
-
-	if (col2[0] > 0.5)
-		outcol[0] = col1[0] + t * (2.0 * (col2[0] - 0.5));
-	else
-		outcol[0] = col1[0] + t * (2.0 * (col2[0]) - 1.0);
-
-	if (col2[1] > 0.5)
-		outcol[1] = col1[1] + t * (2.0 * (col2[1] - 0.5));
-	else
-		outcol[1] = col1[1] + t * (2.0 * (col2[1]) - 1.0);
-
-	if (col2[2] > 0.5)
-		outcol[2] = col1[2] + t * (2.0 * (col2[2] - 0.5));
-	else
-		outcol[2] = col1[2] + t * (2.0 * (col2[2]) - 1.0);
-	
-	return outcol;
+  color outcol = col1;
+
+  if (col2[0] > 0.5)
+    outcol[0] = col1[0] + t * (2.0 * (col2[0] - 0.5));
+  else
+    outcol[0] = col1[0] + t * (2.0 * (col2[0]) - 1.0);
+
+  if (col2[1] > 0.5)
+    outcol[1] = col1[1] + t * (2.0 * (col2[1] - 0.5));
+  else
+    outcol[1] = col1[1] + t * (2.0 * (col2[1]) - 1.0);
+
+  if (col2[2] > 0.5)
+    outcol[2] = col1[2] + t * (2.0 * (col2[2] - 0.5));
+  else
+    outcol[2] = col1[2] + t * (2.0 * (col2[2]) - 1.0);
+
+  return outcol;
 }
 
 color node_mix_clamp(color col)
 {
-	color outcol = col;
+  color outcol = col;
 
-	outcol[0] = clamp(col[0], 0.0, 1.0);
-	outcol[1] = clamp(col[1], 0.0, 1.0);
-	outcol[2] = clamp(col[2], 0.0, 1.0);
+  outcol[0] = clamp(col[0], 0.0, 1.0);
+  outcol[1] = clamp(col[1], 0.0, 1.0);
+  outcol[2] = clamp(col[2], 0.0, 1.0);
 
-	return outcol;
+  return outcol;
 }
 
-shader node_mix(
-	string type = "mix",
-	int use_clamp = 0,
-	float Fac = 0.5,
-	color Color1 = 0.0,
-	color Color2 = 0.0,
-	output color Color = 0.0)
+shader node_mix(string type = "mix",
+                int use_clamp = 0,
+                float Fac = 0.5,
+                color Color1 = 0.0,
+                color Color2 = 0.0,
+                output color Color = 0.0)
 {
-	float t = clamp(Fac, 0.0, 1.0);
-
-	if (type == "mix")
-		Color = node_mix_blend(t, Color1, Color2);
-	if (type == "add")
-		Color = node_mix_add(t, Color1, Color2);
-	if (type == "multiply")
-		Color = node_mix_mul(t, Color1, Color2);
-	if (type == "screen")
-		Color = node_mix_screen(t, Color1, Color2);
-	if (type == "overlay")
-		Color = node_mix_overlay(t, Color1, Color2);
-	if (type == "subtract")
-		Color = node_mix_sub(t, Color1, Color2);
-	if (type == "divide")
-		Color = node_mix_div(t, Color1, Color2);
-	if (type == "difference")
-		Color = node_mix_diff(t, Color1, Color2);
-	if (type == "darken")
-		Color = node_mix_dark(t, Color1, Color2);
-	if (type == "lighten")
-		Color = node_mix_light(t, Color1, Color2);
-	if (type == "dodge")
-		Color = node_mix_dodge(t, Color1, Color2);
-	if (type == "burn")
-		Color = node_mix_burn(t, Color1, Color2);
-	if (type == "hue")
-		Color = node_mix_hue(t, Color1, Color2);
-	if (type == "saturation")
-		Color = node_mix_sat(t, Color1, Color2);
-	if (type == "value")
-		Color = node_mix_val (t, Color1, Color2);
-	if (type == "color")
-		Color = node_mix_color(t, Color1, Color2);
-	if (type == "soft_light")
-		Color = node_mix_soft(t, Color1, Color2);
-	if (type == "linear_light")
-		Color = node_mix_linear(t, Color1, Color2);
-
-	if (use_clamp)
-		Color = node_mix_clamp(Color);
+  float t = clamp(Fac, 0.0, 1.0);
+
+  if (type == "mix")
+    Color = node_mix_blend(t, Color1, Color2);
+  if (type == "add")
+    Color = node_mix_add(t, Color1, Color2);
+  if (type == "multiply")
+    Color = node_mix_mul(t, Color1, Color2);
+  if (type == "screen")
+    Color = node_mix_screen(t, Color1, Color2);
+  if (type == "overlay")
+    Color = node_mix_overlay(t, Color1, Color2);
+  if (type == "subtract")
+    Color = node_mix_sub(t, Color1, Color2);
+  if (type == "divide")
+    Color = node_mix_div(t, Color1, Color2);
+  if (type == "difference")
+    Color = node_mix_diff(t, Color1, Color2);
+  if (type == "darken")
+    Color = node_mix_dark(t, Color1, Color2);
+  if (type == "lighten")
+    Color = node_mix_light(t, Color1, Color2);
+  if (type == "dodge")
+    Color = node_mix_dodge(t, Color1, Color2);
+  if (type == "burn")
+    Color = node_mix_burn(t, Color1, Color2);
+  if (type == "hue")
+    Color = node_mix_hue(t, Color1, Color2);
+  if (type == "saturation")
+    Color = node_mix_sat(t, Color1, Color2);
+  if (type == "value")
+    Color = node_mix_val(t, Color1, Color2);
+  if (type == "color")
+    Color = node_mix_color(t, Color1, Color2);
+  if (type == "soft_light")
+    Color = node_mix_soft(t, Color1, Color2);
+  if (type == "linear_light")
+    Color = node_mix_linear(t, Color1, Color2);
+
+  if (use_clamp)
+    Color = node_mix_clamp(Color);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_mix_closure.osl b/intern/cycles/kernel/shaders/node_mix_closure.osl
index 5946dfdaaba..517c59c8786 100644
--- a/intern/cycles/kernel/shaders/node_mix_closure.osl
+++ b/intern/cycles/kernel/shaders/node_mix_closure.osl
@@ -16,13 +16,11 @@
 
 #include "stdosl.h"
 
-shader node_mix_closure(
-	float Fac = 0.5,
-	closure color Closure1 = 0,
-	closure color Closure2 = 0,
-	output closure color Closure = 0)
+shader node_mix_closure(float Fac = 0.5,
+                        closure color Closure1 = 0,
+                        closure color Closure2 = 0,
+                        output closure color Closure = 0)
 {
-	float t = clamp(Fac, 0.0, 1.0);
-	Closure = (1.0 - t) * Closure1 + t * Closure2;
+  float t = clamp(Fac, 0.0, 1.0);
+  Closure = (1.0 - t) * Closure1 + t * Closure2;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
index 454b3834081..a7877c43d46 100644
--- a/intern/cycles/kernel/shaders/node_musgrave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
@@ -28,24 +28,24 @@
 
 float noise_musgrave_fBm(point ip, float H, float lacunarity, float octaves)
 {
-	float rmd;
-	float value = 0.0;
-	float pwr = 1.0;
-	float pwHL = pow(lacunarity, -H);
-	int i;
-	point p = ip;
-
-	for (i = 0; i < (int)octaves; i++) {
-		value += safe_noise(p, "signed") * pwr;
-		pwr *= pwHL;
-		p *= lacunarity;
-	}
-
-	rmd = octaves - floor(octaves);
-	if (rmd != 0.0)
-		value += rmd * safe_noise(p, "signed") * pwr;
-
-	return value;
+  float rmd;
+  float value = 0.0;
+  float pwr = 1.0;
+  float pwHL = pow(lacunarity, -H);
+  int i;
+  point p = ip;
+
+  for (i = 0; i < (int)octaves; i++) {
+    value += safe_noise(p, "signed") * pwr;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  rmd = octaves - floor(octaves);
+  if (rmd != 0.0)
+    value += rmd * safe_noise(p, "signed") * pwr;
+
+  return value;
 }
 
 /* Musgrave Multifractal
@@ -57,24 +57,24 @@ float noise_musgrave_fBm(point ip, float H, float lacunarity, float octaves)
 
 float noise_musgrave_multi_fractal(point ip, float H, float lacunarity, float octaves)
 {
-	float rmd;
-	float value = 1.0;
-	float pwr = 1.0;
-	float pwHL = pow(lacunarity, -H);
-	int i;
-	point p = ip;
-
-	for (i = 0; i < (int)octaves; i++) {
-		value *= (pwr * safe_noise(p, "signed") + 1.0);
-		pwr *= pwHL;
-		p *= lacunarity;
-	}
-
-	rmd = octaves - floor(octaves);
-	if (rmd != 0.0)
-		value *= (rmd * pwr * safe_noise(p, "signed") + 1.0); /* correct? */
-
-	return value;
+  float rmd;
+  float value = 1.0;
+  float pwr = 1.0;
+  float pwHL = pow(lacunarity, -H);
+  int i;
+  point p = ip;
+
+  for (i = 0; i < (int)octaves; i++) {
+    value *= (pwr * safe_noise(p, "signed") + 1.0);
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  rmd = octaves - floor(octaves);
+  if (rmd != 0.0)
+    value *= (rmd * pwr * safe_noise(p, "signed") + 1.0); /* correct? */
+
+  return value;
 }
 
 /* Musgrave Heterogeneous Terrain
@@ -85,32 +85,33 @@ float noise_musgrave_multi_fractal(point ip, float H, float lacunarity, float oc
  * offset: raises the terrain from `sea level'
  */
 
-float noise_musgrave_hetero_terrain(point ip, float H, float lacunarity, float octaves, float offset)
+float noise_musgrave_hetero_terrain(
+    point ip, float H, float lacunarity, float octaves, float offset)
 {
-	float value, increment, rmd;
-	float pwHL = pow(lacunarity, -H);
-	float pwr = pwHL;
-	int i;
-	point p = ip;
-
-	/* first unscaled octave of function; later octaves are scaled */
-	value = offset + safe_noise(p, "signed");
-	p *= lacunarity;
-
-	for (i = 1; i < (int)octaves; i++) {
-		increment = (safe_noise(p, "signed") + offset) * pwr * value;
-		value += increment;
-		pwr *= pwHL;
-		p *= lacunarity;
-	}
-
-	rmd = octaves - floor(octaves);
-	if (rmd != 0.0) {
-		increment = (safe_noise(p, "signed") + offset) * pwr * value;
-		value += rmd * increment;
-	}
-
-	return value;
+  float value, increment, rmd;
+  float pwHL = pow(lacunarity, -H);
+  float pwr = pwHL;
+  int i;
+  point p = ip;
+
+  /* first unscaled octave of function; later octaves are scaled */
+  value = offset + safe_noise(p, "signed");
+  p *= lacunarity;
+
+  for (i = 1; i < (int)octaves; i++) {
+    increment = (safe_noise(p, "signed") + offset) * pwr * value;
+    value += increment;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  rmd = octaves - floor(octaves);
+  if (rmd != 0.0) {
+    increment = (safe_noise(p, "signed") + offset) * pwr * value;
+    value += rmd * increment;
+  }
+
+  return value;
 }
 
 /* Hybrid Additive/Multiplicative Multifractal Terrain
@@ -121,35 +122,35 @@ float noise_musgrave_hetero_terrain(point ip, float H, float lacunarity, float o
  * offset: raises the terrain from `sea level'
  */
 
-float noise_musgrave_hybrid_multi_fractal(point ip, float H, float lacunarity,
-                                          float octaves, float offset, float gain)
+float noise_musgrave_hybrid_multi_fractal(
+    point ip, float H, float lacunarity, float octaves, float offset, float gain)
 {
-	float result, signal, weight, rmd;
-	float pwHL = pow(lacunarity, -H);
-	float pwr = pwHL;
-	int i;
-	point p = ip;
-
-	result = safe_noise(p, "signed") + offset;
-	weight = gain * result;
-	p *= lacunarity;
-
-	for (i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
-		if (weight > 1.0)
-			weight = 1.0;
-
-		signal = (safe_noise(p, "signed") + offset) * pwr;
-		pwr *= pwHL;
-		result += weight * signal;
-		weight *= gain * signal;
-		p *= lacunarity;
-	}
-
-	rmd = octaves - floor(octaves);
-	if (rmd != 0.0)
-		result += rmd * ((safe_noise(p, "signed") + offset) * pwr);
-
-	return result;
+  float result, signal, weight, rmd;
+  float pwHL = pow(lacunarity, -H);
+  float pwr = pwHL;
+  int i;
+  point p = ip;
+
+  result = safe_noise(p, "signed") + offset;
+  weight = gain * result;
+  p *= lacunarity;
+
+  for (i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
+    if (weight > 1.0)
+      weight = 1.0;
+
+    signal = (safe_noise(p, "signed") + offset) * pwr;
+    pwr *= pwHL;
+    result += weight * signal;
+    weight *= gain * signal;
+    p *= lacunarity;
+  }
+
+  rmd = octaves - floor(octaves);
+  if (rmd != 0.0)
+    result += rmd * ((safe_noise(p, "signed") + offset) * pwr);
+
+  return result;
 }
 
 /* Ridged Multifractal Terrain
@@ -160,72 +161,73 @@ float noise_musgrave_hybrid_multi_fractal(point ip, float H, float lacunarity,
  * offset: raises the terrain from `sea level'
  */
 
-float noise_musgrave_ridged_multi_fractal(point ip, float H, float lacunarity,
-                                          float octaves, float offset, float gain)
+float noise_musgrave_ridged_multi_fractal(
+    point ip, float H, float lacunarity, float octaves, float offset, float gain)
 {
-	float result, signal, weight;
-	float pwHL = pow(lacunarity, -H);
-	float pwr = pwHL;
-	int i;
-	point p = ip;
-
-	signal = offset - fabs(safe_noise(p, "signed"));
-	signal *= signal;
-	result = signal;
-	weight = 1.0;
-
-	for (i = 1; i < (int)octaves; i++) {
-		p *= lacunarity;
-		weight = clamp(signal * gain, 0.0, 1.0);
-		signal = offset - fabs(safe_noise(p, "signed"));
-		signal *= signal;
-		signal *= weight;
-		result += signal * pwr;
-		pwr *= pwHL;
-	}
-
-	return result;
+  float result, signal, weight;
+  float pwHL = pow(lacunarity, -H);
+  float pwr = pwHL;
+  int i;
+  point p = ip;
+
+  signal = offset - fabs(safe_noise(p, "signed"));
+  signal *= signal;
+  result = signal;
+  weight = 1.0;
+
+  for (i = 1; i < (int)octaves; i++) {
+    p *= lacunarity;
+    weight = clamp(signal * gain, 0.0, 1.0);
+    signal = offset - fabs(safe_noise(p, "signed"));
+    signal *= signal;
+    signal *= weight;
+    result += signal * pwr;
+    pwr *= pwHL;
+  }
+
+  return result;
 }
 
 /* Shader */
 
 shader node_musgrave_texture(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	string type = "fBM",
-	float Dimension = 2.0,
-	float Lacunarity = 1.0,
-	float Detail = 2.0,
-	float Offset = 0.0,
-	float Gain = 1.0,
-	float Scale = 5.0,
-	point Vector = P,
-	output float Fac = 0.0,
-	output color Color = 0.0)
+    int use_mapping = 0,
+    matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+    string type = "fBM",
+    float Dimension = 2.0,
+    float Lacunarity = 1.0,
+    float Detail = 2.0,
+    float Offset = 0.0,
+    float Gain = 1.0,
+    float Scale = 5.0,
+    point Vector = P,
+    output float Fac = 0.0,
+    output color Color = 0.0)
 {
-	float dimension = max(Dimension, 1e-5);
-	float octaves = clamp(Detail, 0.0, 16.0);
-	float lacunarity = max(Lacunarity, 1e-5);
-	float intensity = 1.0;
-
-	point p = Vector;
-
-	if (use_mapping)
-		p = transform(mapping, p);
-
-	p = p * Scale;
-
-	if (type == "multifractal")
-		Fac = intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
-	else if (type == "fBM")
-		Fac = intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves);
-	else if (type == "hybrid_multifractal")
-		Fac = intensity * noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
-	else if (type == "ridged_multifractal")
-		Fac = intensity * noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
-	else if (type == "hetero_terrain")
-		Fac = intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, Offset);
-	
-	Color = color(Fac, Fac, Fac);
+  float dimension = max(Dimension, 1e-5);
+  float octaves = clamp(Detail, 0.0, 16.0);
+  float lacunarity = max(Lacunarity, 1e-5);
+  float intensity = 1.0;
+
+  point p = Vector;
+
+  if (use_mapping)
+    p = transform(mapping, p);
+
+  p = p * Scale;
+
+  if (type == "multifractal")
+    Fac = intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
+  else if (type == "fBM")
+    Fac = intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves);
+  else if (type == "hybrid_multifractal")
+    Fac = intensity *
+          noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
+  else if (type == "ridged_multifractal")
+    Fac = intensity *
+          noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
+  else if (type == "hetero_terrain")
+    Fac = intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, Offset);
+
+  Color = color(Fac, Fac, Fac);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_noise_texture.osl b/intern/cycles/kernel/shaders/node_noise_texture.osl
index 42a30897341..2cbd571e206 100644
--- a/intern/cycles/kernel/shaders/node_noise_texture.osl
+++ b/intern/cycles/kernel/shaders/node_noise_texture.osl
@@ -21,41 +21,40 @@
 
 float noise(point ip, float distortion, float detail, output color Color)
 {
-	point r;
-	point p = ip;
-	int hard = 0;
-
-	if (distortion != 0.0) {
-		r[0] = safe_noise(p + point(13.5), "unsigned") * distortion;
-		r[1] = safe_noise(p, "unsigned") * distortion;
-		r[2] = safe_noise(p - point(13.5), "unsigned") * distortion;
-		
-		p += r;
-	}
-
-	float fac = noise_turbulence(p, detail, hard);
-	
-	Color = color(fac, noise_turbulence(point(p[1], p[0], p[2]), detail, hard),
-		noise_turbulence(point(p[1], p[2], p[0]), detail, hard));
-
-	return fac;
+  point r;
+  point p = ip;
+  int hard = 0;
+
+  if (distortion != 0.0) {
+    r[0] = safe_noise(p + point(13.5), "unsigned") * distortion;
+    r[1] = safe_noise(p, "unsigned") * distortion;
+    r[2] = safe_noise(p - point(13.5), "unsigned") * distortion;
+
+    p += r;
+  }
+
+  float fac = noise_turbulence(p, detail, hard);
+
+  Color = color(fac,
+                noise_turbulence(point(p[1], p[0], p[2]), detail, hard),
+                noise_turbulence(point(p[1], p[2], p[0]), detail, hard));
+
+  return fac;
 }
 
-shader node_noise_texture(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	float Distortion = 0.0,
-	float Scale = 5.0,
-	float Detail = 2.0,
-	point Vector = P,
-	output float Fac = 0.0,
-	output color Color = 0.0)
+shader node_noise_texture(int use_mapping = 0,
+                          matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+                          float Distortion = 0.0,
+                          float Scale = 5.0,
+                          float Detail = 2.0,
+                          point Vector = P,
+                          output float Fac = 0.0,
+                          output color Color = 0.0)
 {
-	point p = Vector;
+  point p = Vector;
 
-	if (use_mapping)
-		p = transform(mapping, p);
+  if (use_mapping)
+    p = transform(mapping, p);
 
-	Fac = noise(p * Scale, Distortion, Detail, Color);
+  Fac = noise(p * Scale, Distortion, Detail, Color);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_normal.osl b/intern/cycles/kernel/shaders/node_normal.osl
index 7307971eddd..1d20c3e7cac 100644
--- a/intern/cycles/kernel/shaders/node_normal.osl
+++ b/intern/cycles/kernel/shaders/node_normal.osl
@@ -16,13 +16,11 @@
 
 #include "stdosl.h"
 
-shader node_normal(
-	normal direction = normal(0.0, 0.0, 0.0),
-	normal NormalIn = normal(0.0, 0.0, 0.0),
-	output normal NormalOut = normal(0.0, 0.0, 0.0),
-	output float Dot = 1.0)
+shader node_normal(normal direction = normal(0.0, 0.0, 0.0),
+                   normal NormalIn = normal(0.0, 0.0, 0.0),
+                   output normal NormalOut = normal(0.0, 0.0, 0.0),
+                   output float Dot = 1.0)
 {
-	NormalOut = normalize(direction);
-	Dot = dot(NormalOut, normalize(NormalIn));
+  NormalOut = normalize(direction);
+  Dot = dot(NormalOut, normalize(NormalIn));
 }
-
diff --git a/intern/cycles/kernel/shaders/node_normal_map.osl b/intern/cycles/kernel/shaders/node_normal_map.osl
index fda6f12a5da..90b593d00bc 100644
--- a/intern/cycles/kernel/shaders/node_normal_map.osl
+++ b/intern/cycles/kernel/shaders/node_normal_map.osl
@@ -16,79 +16,75 @@
 
 #include "stdosl.h"
 
-shader node_normal_map(
-	normal NormalIn = N,
-	float Strength = 1.0,
-	color Color = color(0.5, 0.5, 1.0),
-	string space = "tangent",
-	string attr_name = "geom:tangent",
-	string attr_sign_name = "geom:tangent_sign",
-	output normal Normal = NormalIn)
+shader node_normal_map(normal NormalIn = N,
+                       float Strength = 1.0,
+                       color Color = color(0.5, 0.5, 1.0),
+                       string space = "tangent",
+                       string attr_name = "geom:tangent",
+                       string attr_sign_name = "geom:tangent_sign",
+                       output normal Normal = NormalIn)
 {
-	color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5);
-	int is_backfacing = backfacing();
+  color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5);
+  int is_backfacing = backfacing();
 
-	if (space == "tangent") {
-		vector tangent;
-		vector ninterp;
-		float tangent_sign;
-		float is_smooth;
+  if (space == "tangent") {
+    vector tangent;
+    vector ninterp;
+    float tangent_sign;
+    float is_smooth;
 
-		getattribute("geom:is_smooth", is_smooth);
-		if (!is_smooth) {
-			ninterp = normalize(transform("world", "object", Ng));
+    getattribute("geom:is_smooth", is_smooth);
+    if (!is_smooth) {
+      ninterp = normalize(transform("world", "object", Ng));
 
-			/* the normal is already inverted, which is too soon for the math here */
-			if (is_backfacing) {
-				ninterp = -ninterp;
-			}
-		}
+      /* the normal is already inverted, which is too soon for the math here */
+      if (is_backfacing) {
+        ninterp = -ninterp;
+      }
+    }
 
-		// get _unnormalized_ interpolated normal and tangent
-		if (getattribute(attr_name, tangent) &&
-		    getattribute(attr_sign_name, tangent_sign) &&
-		    (!is_smooth || getattribute("geom:N", ninterp)))
-		{
-			// apply normal map
-			vector B = tangent_sign * cross(ninterp, tangent);
-			Normal = normalize(mcolor[0] * tangent + mcolor[1] * B + mcolor[2] * ninterp);
+    // get _unnormalized_ interpolated normal and tangent
+    if (getattribute(attr_name, tangent) && getattribute(attr_sign_name, tangent_sign) &&
+        (!is_smooth || getattribute("geom:N", ninterp))) {
+      // apply normal map
+      vector B = tangent_sign * cross(ninterp, tangent);
+      Normal = normalize(mcolor[0] * tangent + mcolor[1] * B + mcolor[2] * ninterp);
 
-			// transform to world space
-			Normal = normalize(transform("object", "world", Normal));
-		}
-		else {
-			Normal = normal(0, 0, 0);
-		}
-	}
-	else if (space == "object") {
-		Normal = normalize(transform("object", "world", vector(mcolor)));
-	}
-	else if (space == "world") {
-		Normal = normalize(vector(mcolor));
-	}
-	else if (space == "blender_object") {
-		/* strange blender convention */
-		mcolor[1] = -mcolor[1];
-		mcolor[2] = -mcolor[2];
-	
-		Normal = normalize(transform("object", "world", vector(mcolor)));
-	}
-	else if (space == "blender_world") {
-		/* strange blender convention */
-		mcolor[1] = -mcolor[1];
-		mcolor[2] = -mcolor[2];
-	
-		Normal = normalize(vector(mcolor));
-	}
+      // transform to world space
+      Normal = normalize(transform("object", "world", Normal));
+    }
+    else {
+      Normal = normal(0, 0, 0);
+    }
+  }
+  else if (space == "object") {
+    Normal = normalize(transform("object", "world", vector(mcolor)));
+  }
+  else if (space == "world") {
+    Normal = normalize(vector(mcolor));
+  }
+  else if (space == "blender_object") {
+    /* strange blender convention */
+    mcolor[1] = -mcolor[1];
+    mcolor[2] = -mcolor[2];
 
-	/* invert normal for backfacing polygons */
-	if (is_backfacing) {
-		Normal = -Normal;
-	}
+    Normal = normalize(transform("object", "world", vector(mcolor)));
+  }
+  else if (space == "blender_world") {
+    /* strange blender convention */
+    mcolor[1] = -mcolor[1];
+    mcolor[2] = -mcolor[2];
 
-	if (Strength != 1.0)
-		Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0));
+    Normal = normalize(vector(mcolor));
+  }
 
-	Normal = ensure_valid_reflection(Ng, I, Normal);
-}
+  /* invert normal for backfacing polygons */
+  if (is_backfacing) {
+    Normal = -Normal;
+  }
+
+  if (Strength != 1.0)
+    Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0));
 
+  Normal = ensure_valid_reflection(Ng, I, Normal);
+}
diff --git a/intern/cycles/kernel/shaders/node_object_info.osl b/intern/cycles/kernel/shaders/node_object_info.osl
index dd7c663b8d8..0904a30a53f 100644
--- a/intern/cycles/kernel/shaders/node_object_info.osl
+++ b/intern/cycles/kernel/shaders/node_object_info.osl
@@ -16,15 +16,13 @@
 
 #include "stdosl.h"
 
-shader node_object_info(
-	output point Location = point(0.0, 0.0, 0.0),
-	output float ObjectIndex = 0.0,
-	output float MaterialIndex = 0.0,
-	output float Random = 0.0)
+shader node_object_info(output point Location = point(0.0, 0.0, 0.0),
+                        output float ObjectIndex = 0.0,
+                        output float MaterialIndex = 0.0,
+                        output float Random = 0.0)
 {
-	getattribute("object:location", Location);
-	getattribute("object:index", ObjectIndex);
-	getattribute("material:index", MaterialIndex);
-	getattribute("object:random", Random);
+  getattribute("object:location", Location);
+  getattribute("object:index", ObjectIndex);
+  getattribute("material:index", MaterialIndex);
+  getattribute("object:random", Random);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_output_displacement.osl b/intern/cycles/kernel/shaders/node_output_displacement.osl
index 5dbef0244fe..fa7f603980b 100644
--- a/intern/cycles/kernel/shaders/node_output_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_output_displacement.osl
@@ -18,6 +18,5 @@
 
 displacement node_output_displacement(vector Displacement = 0.0)
 {
-	P += Displacement;
+  P += Displacement;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_output_surface.osl b/intern/cycles/kernel/shaders/node_output_surface.osl
index 2cc4575a8c8..013666145da 100644
--- a/intern/cycles/kernel/shaders/node_output_surface.osl
+++ b/intern/cycles/kernel/shaders/node_output_surface.osl
@@ -18,6 +18,5 @@
 
 surface node_output_surface(closure color Surface = 0)
 {
-	Ci = Surface;
+  Ci = Surface;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_output_volume.osl b/intern/cycles/kernel/shaders/node_output_volume.osl
index f220ba866e3..dd479e751b3 100644
--- a/intern/cycles/kernel/shaders/node_output_volume.osl
+++ b/intern/cycles/kernel/shaders/node_output_volume.osl
@@ -18,6 +18,5 @@
 
 volume node_output_volume(closure color Volume = 0)
 {
-	Ci = Volume;
+  Ci = Volume;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_particle_info.osl b/intern/cycles/kernel/shaders/node_particle_info.osl
index 2a0252d5e45..e286c33a1ff 100644
--- a/intern/cycles/kernel/shaders/node_particle_info.osl
+++ b/intern/cycles/kernel/shaders/node_particle_info.osl
@@ -16,23 +16,21 @@
 
 #include "stdosl.h"
 
-shader node_particle_info(
-    output float Index = 0.0,
-    output float Random = 0.0,
-    output float Age = 0.0,
-    output float Lifetime = 0.0,
-    output point Location = point(0.0, 0.0, 0.0),
-    output float Size = 0.0,
-    output vector Velocity = point(0.0, 0.0, 0.0),
-    output vector AngularVelocity = point(0.0, 0.0, 0.0))
+shader node_particle_info(output float Index = 0.0,
+                          output float Random = 0.0,
+                          output float Age = 0.0,
+                          output float Lifetime = 0.0,
+                          output point Location = point(0.0, 0.0, 0.0),
+                          output float Size = 0.0,
+                          output vector Velocity = point(0.0, 0.0, 0.0),
+                          output vector AngularVelocity = point(0.0, 0.0, 0.0))
 {
-	getattribute("particle:index", Index);
-	getattribute("particle:random", Random);
-	getattribute("particle:age", Age);
-	getattribute("particle:lifetime", Lifetime);
-	getattribute("particle:location", Location);
-	getattribute("particle:size", Size);
-	getattribute("particle:velocity", Velocity);
-	getattribute("particle:angular_velocity", AngularVelocity);
+  getattribute("particle:index", Index);
+  getattribute("particle:random", Random);
+  getattribute("particle:age", Age);
+  getattribute("particle:lifetime", Lifetime);
+  getattribute("particle:location", Location);
+  getattribute("particle:size", Size);
+  getattribute("particle:velocity", Velocity);
+  getattribute("particle:angular_velocity", AngularVelocity);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_principled_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
index 6f54ba3a462..657ced9b6e6 100644
--- a/intern/cycles/kernel/shaders/node_principled_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
@@ -17,111 +17,144 @@
 #include "stdosl.h"
 #include "node_fresnel.h"
 
-shader node_principled_bsdf(
-	string distribution = "Multiscatter GGX",
-	string subsurface_method = "burley",
-	color BaseColor = color(0.8, 0.8, 0.8),
-	float Subsurface = 0.0,
-	vector SubsurfaceRadius = vector(1.0, 1.0, 1.0),
-	color SubsurfaceColor = color(0.7, 0.1, 0.1),
-	float Metallic = 0.0,
-	float Specular = 0.5,
-	float SpecularTint = 0.0,
-	float Roughness = 0.5,
-	float Anisotropic = 0.0,
-	float AnisotropicRotation = 0.0,
-	float Sheen = 0.0,
-	float SheenTint = 0.5,
-	float Clearcoat = 0.0,
-	float ClearcoatRoughness = 0.03,
-	float IOR = 1.45,
-	float Transmission = 0.0,
-	float TransmissionRoughness = 0.0,
-	normal Normal = N,
-	normal ClearcoatNormal = N,
-	normal Tangent = normalize(dPdu),
-	output closure color BSDF = 0)
+shader node_principled_bsdf(string distribution = "Multiscatter GGX",
+                            string subsurface_method = "burley",
+                            color BaseColor = color(0.8, 0.8, 0.8),
+                            float Subsurface = 0.0,
+                            vector SubsurfaceRadius = vector(1.0, 1.0, 1.0),
+                            color SubsurfaceColor = color(0.7, 0.1, 0.1),
+                            float Metallic = 0.0,
+                            float Specular = 0.5,
+                            float SpecularTint = 0.0,
+                            float Roughness = 0.5,
+                            float Anisotropic = 0.0,
+                            float AnisotropicRotation = 0.0,
+                            float Sheen = 0.0,
+                            float SheenTint = 0.5,
+                            float Clearcoat = 0.0,
+                            float ClearcoatRoughness = 0.03,
+                            float IOR = 1.45,
+                            float Transmission = 0.0,
+                            float TransmissionRoughness = 0.0,
+                            normal Normal = N,
+                            normal ClearcoatNormal = N,
+                            normal Tangent = normalize(dPdu),
+                            output closure color BSDF = 0)
 {
-	float f = max(IOR, 1e-5);
-	float diffuse_weight = (1.0 - clamp(Metallic, 0.0, 1.0)) * (1.0 - clamp(Transmission, 0.0, 1.0));
-	float final_transmission = clamp(Transmission, 0.0, 1.0) * (1.0 - clamp(Metallic, 0.0, 1.0));
-	float specular_weight = (1.0 - final_transmission);
-
-	vector T = Tangent;
-
-	float m_cdlum = luminance(BaseColor);
-	color m_ctint = m_cdlum > 0.0 ? BaseColor / m_cdlum : color(0.0, 0.0, 0.0); // normalize lum. to isolate hue+sat
-
-	/* rotate tangent */
-	if (AnisotropicRotation != 0.0)
-		T = rotate(T, AnisotropicRotation * M_2PI, point(0.0, 0.0, 0.0), Normal);
-
-	if (diffuse_weight > 1e-5) {
-		if (Subsurface > 1e-5) {
-			color mixed_ss_base_color = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface);
-			if (subsurface_method == "burley") {
-				BSDF = mixed_ss_base_color * bssrdf("principled", Normal, Subsurface * SubsurfaceRadius, SubsurfaceColor, "roughness", Roughness);
-			}
-			else {
-				BSDF = mixed_ss_base_color * bssrdf("principled_random_walk", Normal, Subsurface * SubsurfaceRadius, mixed_ss_base_color, "roughness", Roughness);
-			}
-		}
-		else {
-			BSDF = BaseColor * principled_diffuse(Normal, Roughness);
-		}
-
-		if (Sheen > 1e-5) {
-			color sheen_color = color(1.0, 1.0, 1.0) * (1.0 - SheenTint) + m_ctint * SheenTint;
-
-			BSDF = BSDF + sheen_color * Sheen * principled_sheen(Normal);
-		}
-
-		BSDF = BSDF * diffuse_weight;
-	}
-
-	if (specular_weight > 1e-5) {
-		float aspect = sqrt(1.0 - Anisotropic * 0.9);
-		float r2 = Roughness * Roughness;
-
-		float alpha_x = r2 / aspect;
-		float alpha_y = r2 * aspect;
-
-		color tmp_col = color(1.0, 1.0, 1.0) * (1.0 - SpecularTint) + m_ctint * SpecularTint;
-
-		color Cspec0 = (Specular * 0.08 * tmp_col) * (1.0 - Metallic) + BaseColor * Metallic;
-
-		if (distribution == "GGX" || Roughness <= 0.075) {
-			BSDF = BSDF  + specular_weight * microfacet_ggx_aniso_fresnel(Normal, T, alpha_x, alpha_y, (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0, BaseColor, Cspec0);
-		} else {
-			BSDF = BSDF + specular_weight * microfacet_multi_ggx_aniso_fresnel(Normal, T, alpha_x, alpha_y, (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0, BaseColor, Cspec0);
-		}
-	}
-
-	if (final_transmission > 1e-5) {
-		color Cspec0 = BaseColor * SpecularTint + color(1.0, 1.0, 1.0) * (1.0 - SpecularTint);
-		float eta = backfacing() ? 1.0 / f : f;
-
-		if (distribution == "GGX" || Roughness <= 5e-2) {
-			float cosNO = dot(Normal, I);
-			float Fr = fresnel_dielectric_cos(cosNO, eta);
-
-			float refl_roughness = Roughness;
-			if (Roughness <= 1e-2)
-				refl_roughness = 0.0;
-
-			float transmission_roughness = refl_roughness;
-			if (distribution == "GGX")
-				transmission_roughness = 1.0 - (1.0 - refl_roughness) * (1.0 - TransmissionRoughness);
-
-			BSDF = BSDF + final_transmission * (Fr * microfacet_ggx_fresnel(Normal, refl_roughness * refl_roughness, eta, BaseColor, Cspec0) +
-			       (1.0 - Fr) * BaseColor * microfacet_ggx_refraction(Normal, transmission_roughness * transmission_roughness, eta));
-		} else {
-			BSDF = BSDF + final_transmission * microfacet_multi_ggx_glass_fresnel(Normal, Roughness * Roughness, eta, BaseColor, Cspec0);
-		}
-	}
-
-	if (Clearcoat > 1e-5) {
-		BSDF = BSDF + principled_clearcoat(ClearcoatNormal, Clearcoat, ClearcoatRoughness * ClearcoatRoughness);
-	}
+  float f = max(IOR, 1e-5);
+  float diffuse_weight = (1.0 - clamp(Metallic, 0.0, 1.0)) * (1.0 - clamp(Transmission, 0.0, 1.0));
+  float final_transmission = clamp(Transmission, 0.0, 1.0) * (1.0 - clamp(Metallic, 0.0, 1.0));
+  float specular_weight = (1.0 - final_transmission);
+
+  vector T = Tangent;
+
+  float m_cdlum = luminance(BaseColor);
+  color m_ctint = m_cdlum > 0.0 ? BaseColor / m_cdlum :
+                                  color(0.0, 0.0, 0.0);  // normalize lum. to isolate hue+sat
+
+  /* rotate tangent */
+  if (AnisotropicRotation != 0.0)
+    T = rotate(T, AnisotropicRotation * M_2PI, point(0.0, 0.0, 0.0), Normal);
+
+  if (diffuse_weight > 1e-5) {
+    if (Subsurface > 1e-5) {
+      color mixed_ss_base_color = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface);
+      if (subsurface_method == "burley") {
+        BSDF = mixed_ss_base_color * bssrdf("principled",
+                                            Normal,
+                                            Subsurface * SubsurfaceRadius,
+                                            SubsurfaceColor,
+                                            "roughness",
+                                            Roughness);
+      }
+      else {
+        BSDF = mixed_ss_base_color * bssrdf("principled_random_walk",
+                                            Normal,
+                                            Subsurface * SubsurfaceRadius,
+                                            mixed_ss_base_color,
+                                            "roughness",
+                                            Roughness);
+      }
+    }
+    else {
+      BSDF = BaseColor * principled_diffuse(Normal, Roughness);
+    }
+
+    if (Sheen > 1e-5) {
+      color sheen_color = color(1.0, 1.0, 1.0) * (1.0 - SheenTint) + m_ctint * SheenTint;
+
+      BSDF = BSDF + sheen_color * Sheen * principled_sheen(Normal);
+    }
+
+    BSDF = BSDF * diffuse_weight;
+  }
+
+  if (specular_weight > 1e-5) {
+    float aspect = sqrt(1.0 - Anisotropic * 0.9);
+    float r2 = Roughness * Roughness;
+
+    float alpha_x = r2 / aspect;
+    float alpha_y = r2 * aspect;
+
+    color tmp_col = color(1.0, 1.0, 1.0) * (1.0 - SpecularTint) + m_ctint * SpecularTint;
+
+    color Cspec0 = (Specular * 0.08 * tmp_col) * (1.0 - Metallic) + BaseColor * Metallic;
+
+    if (distribution == "GGX" || Roughness <= 0.075) {
+      BSDF = BSDF + specular_weight *
+                        microfacet_ggx_aniso_fresnel(Normal,
+                                                     T,
+                                                     alpha_x,
+                                                     alpha_y,
+                                                     (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0,
+                                                     BaseColor,
+                                                     Cspec0);
+    }
+    else {
+      BSDF = BSDF + specular_weight * microfacet_multi_ggx_aniso_fresnel(
+                                          Normal,
+                                          T,
+                                          alpha_x,
+                                          alpha_y,
+                                          (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0,
+                                          BaseColor,
+                                          Cspec0);
+    }
+  }
+
+  if (final_transmission > 1e-5) {
+    color Cspec0 = BaseColor * SpecularTint + color(1.0, 1.0, 1.0) * (1.0 - SpecularTint);
+    float eta = backfacing() ? 1.0 / f : f;
+
+    if (distribution == "GGX" || Roughness <= 5e-2) {
+      float cosNO = dot(Normal, I);
+      float Fr = fresnel_dielectric_cos(cosNO, eta);
+
+      float refl_roughness = Roughness;
+      if (Roughness <= 1e-2)
+        refl_roughness = 0.0;
+
+      float transmission_roughness = refl_roughness;
+      if (distribution == "GGX")
+        transmission_roughness = 1.0 - (1.0 - refl_roughness) * (1.0 - TransmissionRoughness);
+
+      BSDF = BSDF +
+             final_transmission *
+                 (Fr * microfacet_ggx_fresnel(
+                           Normal, refl_roughness * refl_roughness, eta, BaseColor, Cspec0) +
+                  (1.0 - Fr) * BaseColor *
+                      microfacet_ggx_refraction(
+                          Normal, transmission_roughness * transmission_roughness, eta));
+    }
+    else {
+      BSDF = BSDF +
+             final_transmission * microfacet_multi_ggx_glass_fresnel(
+                                      Normal, Roughness * Roughness, eta, BaseColor, Cspec0);
+    }
+  }
+
+  if (Clearcoat > 1e-5) {
+    BSDF = BSDF + principled_clearcoat(
+                      ClearcoatNormal, Clearcoat, ClearcoatRoughness * ClearcoatRoughness);
+  }
 }
-
diff --git a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
index 757a88f8ece..bf986438fca 100644
--- a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl
@@ -18,88 +18,88 @@
 
 color log3(color a)
 {
-	return color(log(a[0]), log(a[1]), log(a[2]));
+  return color(log(a[0]), log(a[1]), log(a[2]));
 }
 
 color sigma_from_concentration(float eumelanin, float pheomelanin)
 {
-	return eumelanin*color(0.506, 0.841, 1.653) + pheomelanin*color(0.343, 0.733, 1.924);
+  return eumelanin * color(0.506, 0.841, 1.653) + pheomelanin * color(0.343, 0.733, 1.924);
 }
 
 color sigma_from_reflectance(color c, float azimuthal_roughness)
 {
-	float x = azimuthal_roughness;
-	float roughness_fac = (((((0.245*x) + 5.574)*x - 10.73)*x + 2.532)*x - 0.215)*x + 5.969;
-	color sigma = log3(c) / roughness_fac;
-	return sigma * sigma;
+  float x = azimuthal_roughness;
+  float roughness_fac = (((((0.245 * x) + 5.574) * x - 10.73) * x + 2.532) * x - 0.215) * x +
+                        5.969;
+  color sigma = log3(c) / roughness_fac;
+  return sigma * sigma;
 }
 
-shader node_principled_hair_bsdf(
-	color Color = color(0.017513, 0.005763, 0.002059),
-	float Melanin = 0.8,
-	float MelaninRedness = 1.0,
-	float RandomColor = 0.0,
-	color Tint = 1.0,
-	color AbsorptionCoefficient = color(0.245531, 0.52, 1.365),
-	normal Normal = Ng,
-	string parametrization = "Absorption coefficient",
-	float Offset = radians(2),
-	float Roughness = 0.3,
-	float RadialRoughness = 0.3,
-	float RandomRoughness = 0.0,
-	float Coat = 0.0,
-	float IOR = 1.55,
-	string AttrRandom = "geom:curve_random",
-	float Random = 0.0,
+shader node_principled_hair_bsdf(color Color = color(0.017513, 0.005763, 0.002059),
+                                 float Melanin = 0.8,
+                                 float MelaninRedness = 1.0,
+                                 float RandomColor = 0.0,
+                                 color Tint = 1.0,
+                                 color AbsorptionCoefficient = color(0.245531, 0.52, 1.365),
+                                 normal Normal = Ng,
+                                 string parametrization = "Absorption coefficient",
+                                 float Offset = radians(2),
+                                 float Roughness = 0.3,
+                                 float RadialRoughness = 0.3,
+                                 float RandomRoughness = 0.0,
+                                 float Coat = 0.0,
+                                 float IOR = 1.55,
+                                 string AttrRandom = "geom:curve_random",
+                                 float Random = 0.0,
 
-	output closure color BSDF = 0)
+                                 output closure color BSDF = 0)
 {
-	/* Get random value from curve in none is specified. */
-	float random_value = 0.0;
+  /* Get random value from curve in none is specified. */
+  float random_value = 0.0;
 
-	if (isconnected(Random)) {
-		random_value = Random;
-	}
-	else {
-		getattribute(AttrRandom, random_value);
-	}
+  if (isconnected(Random)) {
+    random_value = Random;
+  }
+  else {
+    getattribute(AttrRandom, random_value);
+  }
 
-	/* Compute roughness. */
-	float factor_random_roughness = 1.0 + 2.0*(random_value - 0.5)*RandomRoughness;
-	float m0_roughness = 1.0 - clamp(Coat, 0.0, 1.0);
-	float roughness = Roughness*factor_random_roughness;
-	float radial_roughness = RadialRoughness*factor_random_roughness;
+  /* Compute roughness. */
+  float factor_random_roughness = 1.0 + 2.0 * (random_value - 0.5) * RandomRoughness;
+  float m0_roughness = 1.0 - clamp(Coat, 0.0, 1.0);
+  float roughness = Roughness * factor_random_roughness;
+  float radial_roughness = RadialRoughness * factor_random_roughness;
 
-	/* Compute absorption. */
-	color sigma;
+  /* Compute absorption. */
+  color sigma;
 
-	if (parametrization == "Absorption coefficient") {
-		sigma = AbsorptionCoefficient;
-	}
-	else if (parametrization == "Melanin concentration") {
-		/* Randomize melanin. */
-		float factor_random_color = 1.0 + 2.0*(random_value - 0.5) * RandomColor;
-		float melanin = Melanin * factor_random_color;
+  if (parametrization == "Absorption coefficient") {
+    sigma = AbsorptionCoefficient;
+  }
+  else if (parametrization == "Melanin concentration") {
+    /* Randomize melanin. */
+    float factor_random_color = 1.0 + 2.0 * (random_value - 0.5) * RandomColor;
+    float melanin = Melanin * factor_random_color;
 
-		/* Map melanin 0..inf from more perceptually linear 0..1. */
-		melanin = -log(max(1.0 - melanin, 0.0001));
+    /* Map melanin 0..inf from more perceptually linear 0..1. */
+    melanin = -log(max(1.0 - melanin, 0.0001));
 
-		/* Benedikt Bitterli's melanin ratio remapping. */
-		float eumelanin = melanin * (1.0 - MelaninRedness);
-		float pheomelanin = melanin * MelaninRedness;
-		color melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin);
+    /* Benedikt Bitterli's melanin ratio remapping. */
+    float eumelanin = melanin * (1.0 - MelaninRedness);
+    float pheomelanin = melanin * MelaninRedness;
+    color melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin);
 
-		/* Optional tint. */
-		color tint_sigma = sigma_from_reflectance(Tint, radial_roughness);
-		sigma = melanin_sigma + tint_sigma;
-	}
-	else if (parametrization == "Direct coloring"){
-		sigma = sigma_from_reflectance(Color, radial_roughness);
-	}
-	else {
-		/* Fallback to brownish hair, same as defaults for melanin. */
-		sigma = sigma_from_concentration(0.0, 0.8054375);
-	}
+    /* Optional tint. */
+    color tint_sigma = sigma_from_reflectance(Tint, radial_roughness);
+    sigma = melanin_sigma + tint_sigma;
+  }
+  else if (parametrization == "Direct coloring") {
+    sigma = sigma_from_reflectance(Color, radial_roughness);
+  }
+  else {
+    /* Fallback to brownish hair, same as defaults for melanin. */
+    sigma = sigma_from_concentration(0.0, 0.8054375);
+  }
 
-	BSDF = principled_hair(Normal, sigma, roughness, radial_roughness, m0_roughness, Offset, IOR);
+  BSDF = principled_hair(Normal, sigma, roughness, radial_roughness, m0_roughness, Offset, IOR);
 }
diff --git a/intern/cycles/kernel/shaders/node_principled_volume.osl b/intern/cycles/kernel/shaders/node_principled_volume.osl
index ea8d6ab12c5..39cf6837eb2 100644
--- a/intern/cycles/kernel/shaders/node_principled_volume.osl
+++ b/intern/cycles/kernel/shaders/node_principled_volume.osl
@@ -16,80 +16,78 @@
 
 #include "stdosl.h"
 
-shader node_principled_volume(
-	color Color = color(0.5, 0.5, 0.5),
-	float Density = 1.0,
-	float Anisotropy = 0.0,
-	color AbsorptionColor = color(0.0, 0.0, 0.0),
-	float EmissionStrength = 0.0,
-	color EmissionColor = color(1.0, 1.0, 1.0),
-	float BlackbodyIntensity = 0.0,
-	color BlackbodyTint = color(1.0, 1.0, 1.0),
-	float Temperature = 1500.0,
-	string DensityAttribute = "geom:density",
-	string ColorAttribute = "geom:color",
-	string TemperatureAttribute = "geom:temperature",
-	output closure color Volume = 0)
+shader node_principled_volume(color Color = color(0.5, 0.5, 0.5),
+                              float Density = 1.0,
+                              float Anisotropy = 0.0,
+                              color AbsorptionColor = color(0.0, 0.0, 0.0),
+                              float EmissionStrength = 0.0,
+                              color EmissionColor = color(1.0, 1.0, 1.0),
+                              float BlackbodyIntensity = 0.0,
+                              color BlackbodyTint = color(1.0, 1.0, 1.0),
+                              float Temperature = 1500.0,
+                              string DensityAttribute = "geom:density",
+                              string ColorAttribute = "geom:color",
+                              string TemperatureAttribute = "geom:temperature",
+                              output closure color Volume = 0)
 {
-	/* Compute density. */
-	float primitive_density = 1.0;
-	float density = max(Density, 0.0);
+  /* Compute density. */
+  float primitive_density = 1.0;
+  float density = max(Density, 0.0);
 
-	if(density > 1e-5) {
-		if(getattribute(DensityAttribute, primitive_density)) {
-			density = max(density * primitive_density, 0.0);
-		}
-	}
+  if (density > 1e-5) {
+    if (getattribute(DensityAttribute, primitive_density)) {
+      density = max(density * primitive_density, 0.0);
+    }
+  }
 
-	if(density > 1e-5) {
-		/* Compute scattering color. */
-		color scatter_color = Color;
-		color primitive_color;
-		if(getattribute(ColorAttribute, primitive_color)) {
-			scatter_color *= primitive_color;
-		}
+  if (density > 1e-5) {
+    /* Compute scattering color. */
+    color scatter_color = Color;
+    color primitive_color;
+    if (getattribute(ColorAttribute, primitive_color)) {
+      scatter_color *= primitive_color;
+    }
 
-		/* Add scattering and absorption closures. */
-		color scatter_coeff = scatter_color;
-		color absorption_color = sqrt(max(AbsorptionColor, 0.0));
-		color absorption_coeff = max(1.0 - scatter_color, 0.0) * max(1.0 - absorption_color, 0.0);
-		Volume = scatter_coeff * density * henyey_greenstein(Anisotropy) +
-		         absorption_coeff * density * absorption();
-	}
+    /* Add scattering and absorption closures. */
+    color scatter_coeff = scatter_color;
+    color absorption_color = sqrt(max(AbsorptionColor, 0.0));
+    color absorption_coeff = max(1.0 - scatter_color, 0.0) * max(1.0 - absorption_color, 0.0);
+    Volume = scatter_coeff * density * henyey_greenstein(Anisotropy) +
+             absorption_coeff * density * absorption();
+  }
 
-	/* Compute emission. */
-	float emission_strength = max(EmissionStrength, 0.0);
-	float blackbody_intensity = BlackbodyIntensity;
+  /* Compute emission. */
+  float emission_strength = max(EmissionStrength, 0.0);
+  float blackbody_intensity = BlackbodyIntensity;
 
-	if(emission_strength > 1e-5) {
-		Volume += emission_strength * EmissionColor * emission();
-	}
+  if (emission_strength > 1e-5) {
+    Volume += emission_strength * EmissionColor * emission();
+  }
 
-	if(blackbody_intensity > 1e-3) {
-		float T = Temperature;
+  if (blackbody_intensity > 1e-3) {
+    float T = Temperature;
 
-		/* Add temperature from attribute if available. */
-		float temperature;
-		if(getattribute(TemperatureAttribute, temperature)) {
-			T *= max(temperature, 0.0);
-		}
+    /* Add temperature from attribute if available. */
+    float temperature;
+    if (getattribute(TemperatureAttribute, temperature)) {
+      T *= max(temperature, 0.0);
+    }
 
-		T = max(T, 0.0);
+    T = max(T, 0.0);
 
-		/* Stefan-Boltzman law. */
-		float T4 = (T * T) * (T * T);
-		float sigma = 5.670373e-8 * 1e-6 / M_PI;
-		float intensity = sigma * mix(1.0, T4, blackbody_intensity);
+    /* Stefan-Boltzman law. */
+    float T4 = (T * T) * (T * T);
+    float sigma = 5.670373e-8 * 1e-6 / M_PI;
+    float intensity = sigma * mix(1.0, T4, blackbody_intensity);
 
-		if(intensity > 1e-5) {
-			color bb = blackbody(T);
-			float l = luminance(bb);
+    if (intensity > 1e-5) {
+      color bb = blackbody(T);
+      float l = luminance(bb);
 
-			if(l != 0.0) {
-				bb *= BlackbodyTint * intensity / l;
-				Volume += bb * emission();
-			}
-		}
-	}
+      if (l != 0.0) {
+        bb *= BlackbodyTint * intensity / l;
+        Volume += bb * emission();
+      }
+    }
+  }
 }
-
diff --git a/intern/cycles/kernel/shaders/node_ramp_util.h b/intern/cycles/kernel/shaders/node_ramp_util.h
index d07d5a98316..f7fb07b257d 100644
--- a/intern/cycles/kernel/shaders/node_ramp_util.h
+++ b/intern/cycles/kernel/shaders/node_ramp_util.h
@@ -18,72 +18,76 @@
 
 color rgb_ramp_lookup(color ramp[], float at, int interpolate, int extrapolate)
 {
-	float f = at;
-	int table_size = arraylength(ramp);
+  float f = at;
+  int table_size = arraylength(ramp);
 
-	if ((f < 0.0 || f > 1.0) && extrapolate) {
-		color t0, dy;
-		if (f < 0.0) {
-			t0 = ramp[0];
-			dy = t0 - ramp[1];
-			f = -f;
-		}
-		else {
-			t0 = ramp[table_size - 1];
-			dy = t0 - ramp[table_size - 2];
-			f = f - 1.0;
-		}
-		return t0 + dy * f * (table_size - 1);
-	}
+  if ((f < 0.0 || f > 1.0) && extrapolate) {
+    color t0, dy;
+    if (f < 0.0) {
+      t0 = ramp[0];
+      dy = t0 - ramp[1];
+      f = -f;
+    }
+    else {
+      t0 = ramp[table_size - 1];
+      dy = t0 - ramp[table_size - 2];
+      f = f - 1.0;
+    }
+    return t0 + dy * f * (table_size - 1);
+  }
 
-	f = clamp(at, 0.0, 1.0) * (table_size - 1);
+  f = clamp(at, 0.0, 1.0) * (table_size - 1);
 
-	/* clamp int as well in case of NaN */
-	int i = (int)f;
-	if (i < 0) i = 0;
-	if (i >= table_size) i = table_size - 1;
-	float t = f - (float)i;
+  /* clamp int as well in case of NaN */
+  int i = (int)f;
+  if (i < 0)
+    i = 0;
+  if (i >= table_size)
+    i = table_size - 1;
+  float t = f - (float)i;
 
-	color result = ramp[i];
+  color result = ramp[i];
 
-	if (interpolate && t > 0.0)
-		result = (1.0 - t) * result + t * ramp[i + 1];
+  if (interpolate && t > 0.0)
+    result = (1.0 - t) * result + t * ramp[i + 1];
 
-	return result;
+  return result;
 }
 
 float rgb_ramp_lookup(float ramp[], float at, int interpolate, int extrapolate)
 {
-	float f = at;
-	int table_size = arraylength(ramp);
+  float f = at;
+  int table_size = arraylength(ramp);
 
-	if ((f < 0.0 || f > 1.0) && extrapolate) {
-		float t0, dy;
-		if (f < 0.0) {
-			t0 = ramp[0];
-			dy = t0 - ramp[1];
-			f = -f;
-		}
-		else {
-			t0 = ramp[table_size - 1];
-			dy = t0 - ramp[table_size - 2];
-			f = f - 1.0;
-		}
-		return t0 + dy * f * (table_size - 1);
-	}
+  if ((f < 0.0 || f > 1.0) && extrapolate) {
+    float t0, dy;
+    if (f < 0.0) {
+      t0 = ramp[0];
+      dy = t0 - ramp[1];
+      f = -f;
+    }
+    else {
+      t0 = ramp[table_size - 1];
+      dy = t0 - ramp[table_size - 2];
+      f = f - 1.0;
+    }
+    return t0 + dy * f * (table_size - 1);
+  }
 
-	f = clamp(at, 0.0, 1.0) * (table_size - 1);
+  f = clamp(at, 0.0, 1.0) * (table_size - 1);
 
-	/* clamp int as well in case of NaN */
-	int i = (int)f;
-	if (i < 0) i = 0;
-	if (i >= table_size) i = table_size - 1;
-	float t = f - (float)i;
+  /* clamp int as well in case of NaN */
+  int i = (int)f;
+  if (i < 0)
+    i = 0;
+  if (i >= table_size)
+    i = table_size - 1;
+  float t = f - (float)i;
 
-	float result = ramp[i];
+  float result = ramp[i];
 
-	if (interpolate && t > 0.0)
-		result = (1.0 - t) * result + t * ramp[i + 1];
+  if (interpolate && t > 0.0)
+    result = (1.0 - t) * result + t * ramp[i + 1];
 
-	return result;
+  return result;
 }
diff --git a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
index eaab7282243..941d99dd44d 100644
--- a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
@@ -16,23 +16,21 @@
 
 #include "stdosl.h"
 
-shader node_refraction_bsdf(
-	color Color = 0.8,
-	string distribution = "sharp",
-	float Roughness = 0.2,
-	float IOR = 1.45,
-	normal Normal = N,
-	output closure color BSDF = 0)
+shader node_refraction_bsdf(color Color = 0.8,
+                            string distribution = "sharp",
+                            float Roughness = 0.2,
+                            float IOR = 1.45,
+                            normal Normal = N,
+                            output closure color BSDF = 0)
 {
-	float f = max(IOR, 1e-5);
-	float eta = backfacing() ? 1.0 / f : f;
-	float roughness = Roughness * Roughness;
+  float f = max(IOR, 1e-5);
+  float eta = backfacing() ? 1.0 / f : f;
+  float roughness = Roughness * Roughness;
 
-	if (distribution == "sharp")
-		BSDF = Color * refraction(Normal, eta);
-	else if (distribution == "beckmann")
-		BSDF = Color * microfacet_beckmann_refraction(Normal, roughness, eta);
-	else if (distribution == "GGX")
-		BSDF = Color * microfacet_ggx_refraction(Normal, roughness, eta);
+  if (distribution == "sharp")
+    BSDF = Color * refraction(Normal, eta);
+  else if (distribution == "beckmann")
+    BSDF = Color * microfacet_beckmann_refraction(Normal, roughness, eta);
+  else if (distribution == "GGX")
+    BSDF = Color * microfacet_ggx_refraction(Normal, roughness, eta);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_rgb_curves.osl b/intern/cycles/kernel/shaders/node_rgb_curves.osl
index 0d5eeea5c43..e34eb027cc3 100644
--- a/intern/cycles/kernel/shaders/node_rgb_curves.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_curves.osl
@@ -17,25 +17,23 @@
 #include "stdosl.h"
 #include "node_ramp_util.h"
 
-shader node_rgb_curves(
-	color ramp[] = {0.0},
-	float min_x = 0.0,
-	float max_x = 1.0,
+shader node_rgb_curves(color ramp[] = {0.0},
+                       float min_x = 0.0,
+                       float max_x = 1.0,
 
-	color ColorIn = 0.0,
-	float Fac = 0.0,
-	output color ColorOut = 0.0)
+                       color ColorIn = 0.0,
+                       float Fac = 0.0,
+                       output color ColorOut = 0.0)
 {
-	color c = (ColorIn - color(min_x, min_x, min_x)) / (max_x - min_x);
+  color c = (ColorIn - color(min_x, min_x, min_x)) / (max_x - min_x);
 
-	color r = rgb_ramp_lookup(ramp, c[0], 1, 1);
-	color g = rgb_ramp_lookup(ramp, c[1], 1, 1);
-	color b = rgb_ramp_lookup(ramp, c[2], 1, 1);
+  color r = rgb_ramp_lookup(ramp, c[0], 1, 1);
+  color g = rgb_ramp_lookup(ramp, c[1], 1, 1);
+  color b = rgb_ramp_lookup(ramp, c[2], 1, 1);
 
-	ColorOut[0] = r[0];
-	ColorOut[1] = g[1];
-	ColorOut[2] = b[2];
+  ColorOut[0] = r[0];
+  ColorOut[1] = g[1];
+  ColorOut[2] = b[2];
 
-	ColorOut = mix(ColorIn, ColorOut, Fac);
+  ColorOut = mix(ColorIn, ColorOut, Fac);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_rgb_ramp.osl b/intern/cycles/kernel/shaders/node_rgb_ramp.osl
index 4e7d8fdcf65..c9f9746a4fb 100644
--- a/intern/cycles/kernel/shaders/node_rgb_ramp.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_ramp.osl
@@ -17,16 +17,14 @@
 #include "stdosl.h"
 #include "node_ramp_util.h"
 
-shader node_rgb_ramp(
-	color ramp_color[] = {0.0},
-	float ramp_alpha[] = {0.0},
-	int interpolate = 1,
+shader node_rgb_ramp(color ramp_color[] = {0.0},
+                     float ramp_alpha[] = {0.0},
+                     int interpolate = 1,
 
-	float Fac = 0.0,
-	output color Color = 0.0,
-	output float Alpha = 1.0)
+                     float Fac = 0.0,
+                     output color Color = 0.0,
+                     output float Alpha = 1.0)
 {
-	Color = rgb_ramp_lookup(ramp_color, Fac, interpolate, 0);
-	Alpha = rgb_ramp_lookup(ramp_alpha, Fac, interpolate, 0);
+  Color = rgb_ramp_lookup(ramp_color, Fac, interpolate, 0);
+  Alpha = rgb_ramp_lookup(ramp_alpha, Fac, interpolate, 0);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
index 903dfcdc881..837d6caf5fc 100644
--- a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl
@@ -16,10 +16,7 @@
 
 #include "stdosl.h"
 
-shader node_rgb_to_bw(
-	color Color = 0.0,
-	output float Val = 0.0)
+shader node_rgb_to_bw(color Color = 0.0, output float Val = 0.0)
 {
-	Val = Color[0] * 0.2126 + Color[1] * 0.7152 + Color[2] * 0.0722;
+  Val = Color[0] * 0.2126 + Color[1] * 0.7152 + Color[2] * 0.0722;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_scatter_volume.osl b/intern/cycles/kernel/shaders/node_scatter_volume.osl
index 002e2750fca..fce5716f372 100644
--- a/intern/cycles/kernel/shaders/node_scatter_volume.osl
+++ b/intern/cycles/kernel/shaders/node_scatter_volume.osl
@@ -16,12 +16,10 @@
 
 #include "stdosl.h"
 
-shader node_scatter_volume(
-	color Color = color(0.8, 0.8, 0.8),
-	float Density = 1.0,
-	float Anisotropy = 0.0,
-	output closure color Volume = 0)
+shader node_scatter_volume(color Color = color(0.8, 0.8, 0.8),
+                           float Density = 1.0,
+                           float Anisotropy = 0.0,
+                           output closure color Volume = 0)
 {
-	Volume = (Color * max(Density, 0.0)) * henyey_greenstein(Anisotropy);
+  Volume = (Color * max(Density, 0.0)) * henyey_greenstein(Anisotropy);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_separate_hsv.osl b/intern/cycles/kernel/shaders/node_separate_hsv.osl
index 2a804040294..c77ed1f3755 100644
--- a/intern/cycles/kernel/shaders/node_separate_hsv.osl
+++ b/intern/cycles/kernel/shaders/node_separate_hsv.osl
@@ -17,15 +17,14 @@
 #include "stdosl.h"
 #include "node_color.h"
 
-shader node_separate_hsv(
-	color Color = 0.8,
-	output float H = 0.0,
-	output float S = 0.0,
-	output float V = 0.0)
+shader node_separate_hsv(color Color = 0.8,
+                         output float H = 0.0,
+                         output float S = 0.0,
+                         output float V = 0.0)
 {
-	color col = rgb_to_hsv(Color);
-	
-	H = col[0];
-	S = col[1];
-	V = col[2];
+  color col = rgb_to_hsv(Color);
+
+  H = col[0];
+  S = col[1];
+  V = col[2];
 }
diff --git a/intern/cycles/kernel/shaders/node_separate_rgb.osl b/intern/cycles/kernel/shaders/node_separate_rgb.osl
index 43d9e3aa4b1..ee64add27e2 100644
--- a/intern/cycles/kernel/shaders/node_separate_rgb.osl
+++ b/intern/cycles/kernel/shaders/node_separate_rgb.osl
@@ -16,13 +16,12 @@
 
 #include "stdosl.h"
 
-shader node_separate_rgb(
-	color Image = 0.8,
-	output float R = 0.0,
-	output float G = 0.0,
-	output float B = 0.0)
+shader node_separate_rgb(color Image = 0.8,
+                         output float R = 0.0,
+                         output float G = 0.0,
+                         output float B = 0.0)
 {
-	R = Image[0];
-	G = Image[1];
-	B = Image[2];
+  R = Image[0];
+  G = Image[1];
+  B = Image[2];
 }
diff --git a/intern/cycles/kernel/shaders/node_separate_xyz.osl b/intern/cycles/kernel/shaders/node_separate_xyz.osl
index e1963a1902f..8a563f5e920 100644
--- a/intern/cycles/kernel/shaders/node_separate_xyz.osl
+++ b/intern/cycles/kernel/shaders/node_separate_xyz.osl
@@ -16,13 +16,12 @@
 
 #include "stdosl.h"
 
-shader node_separate_xyz(
-	vector Vector = 0.8,
-	output float X = 0.0,
-	output float Y = 0.0,
-	output float Z = 0.0)
+shader node_separate_xyz(vector Vector = 0.8,
+                         output float X = 0.0,
+                         output float Y = 0.0,
+                         output float Z = 0.0)
 {
-	X = Vector[0];
-	Y = Vector[1];
-	Z = Vector[2];
+  X = Vector[0];
+  Y = Vector[1];
+  Z = Vector[2];
 }
diff --git a/intern/cycles/kernel/shaders/node_set_normal.osl b/intern/cycles/kernel/shaders/node_set_normal.osl
index 7ca7ac9350c..9541b829ef7 100644
--- a/intern/cycles/kernel/shaders/node_set_normal.osl
+++ b/intern/cycles/kernel/shaders/node_set_normal.osl
@@ -16,11 +16,8 @@
 
 #include "stdosl.h"
 
-surface node_set_normal(
-	normal Direction = N,
-	output normal Normal = N)
+surface node_set_normal(normal Direction = N, output normal Normal = N)
 {
-	N = Direction;
-	Normal = Direction;
+  N = Direction;
+  Normal = Direction;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_sky_texture.osl b/intern/cycles/kernel/shaders/node_sky_texture.osl
index a6c187d15f2..9b29e5489c2 100644
--- a/intern/cycles/kernel/shaders/node_sky_texture.osl
+++ b/intern/cycles/kernel/shaders/node_sky_texture.osl
@@ -19,115 +19,122 @@
 
 float sky_angle_between(float thetav, float phiv, float theta, float phi)
 {
-	float cospsi = sin(thetav) * sin(theta) * cos(phi - phiv) + cos(thetav) * cos(theta);
+  float cospsi = sin(thetav) * sin(theta) * cos(phi - phiv) + cos(thetav) * cos(theta);
 
-	if (cospsi > 1.0)
-		return 0.0;
-	if (cospsi < -1.0)
-		return M_PI;
+  if (cospsi > 1.0)
+    return 0.0;
+  if (cospsi < -1.0)
+    return M_PI;
 
-	return acos(cospsi);
+  return acos(cospsi);
 }
 
 vector sky_spherical_coordinates(vector dir)
 {
-	return vector(acos(dir[2]), atan2(dir[0], dir[1]), 0);
+  return vector(acos(dir[2]), atan2(dir[0], dir[1]), 0);
 }
 
 /* Preetham */
 float sky_perez_function(float lam[9], float theta, float gamma)
 {
-	float ctheta = cos(theta);
-	float cgamma = cos(gamma);
+  float ctheta = cos(theta);
+  float cgamma = cos(gamma);
 
-	return (1.0 + lam[0] * exp(lam[1] / ctheta)) * (1.0 + lam[2] * exp(lam[3] * gamma) + lam[4] * cgamma * cgamma);
+  return (1.0 + lam[0] * exp(lam[1] / ctheta)) *
+         (1.0 + lam[2] * exp(lam[3] * gamma) + lam[4] * cgamma * cgamma);
 }
 
 color sky_radiance_old(normal dir,
-                       float sunphi, float suntheta, color radiance,
-                       float config_x[9], float config_y[9], float config_z[9])
+                       float sunphi,
+                       float suntheta,
+                       color radiance,
+                       float config_x[9],
+                       float config_y[9],
+                       float config_z[9])
 {
-	/* convert vector to spherical coordinates */
-	vector spherical = sky_spherical_coordinates(dir);
-	float theta = spherical[0];
-	float phi = spherical[1];
+  /* convert vector to spherical coordinates */
+  vector spherical = sky_spherical_coordinates(dir);
+  float theta = spherical[0];
+  float phi = spherical[1];
 
-	/* angle between sun direction and dir */
-	float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
+  /* angle between sun direction and dir */
+  float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
 
-	/* clamp theta to horizon */
-	theta = min(theta, M_PI_2 - 0.001);
+  /* clamp theta to horizon */
+  theta = min(theta, M_PI_2 - 0.001);
 
-	/* compute xyY color space values */
-	float x = radiance[1] * sky_perez_function(config_y, theta, gamma);
-	float y = radiance[2] * sky_perez_function(config_z, theta, gamma);
-	float Y = radiance[0] * sky_perez_function(config_x, theta, gamma);
+  /* compute xyY color space values */
+  float x = radiance[1] * sky_perez_function(config_y, theta, gamma);
+  float y = radiance[2] * sky_perez_function(config_z, theta, gamma);
+  float Y = radiance[0] * sky_perez_function(config_x, theta, gamma);
 
-	/* convert to RGB */
-	color xyz = xyY_to_xyz(x, y, Y);
-	return xyz_to_rgb(xyz[0], xyz[1], xyz[2]);
+  /* convert to RGB */
+  color xyz = xyY_to_xyz(x, y, Y);
+  return xyz_to_rgb(xyz[0], xyz[1], xyz[2]);
 }
 
 /* Hosek / Wilkie */
 float sky_radiance_internal(float config[9], float theta, float gamma)
 {
-	float ctheta = cos(theta);
-	float cgamma = cos(gamma);
-	
-	float expM = exp(config[4] * gamma);
-	float rayM = cgamma * cgamma;
-	float mieM = (1.0 + rayM) / pow((1.0 + config[8] * config[8] - 2.0 * config[8] * cgamma), 1.5);
-	float zenith = sqrt(ctheta);
-
-	return (1.0 + config[0] * exp(config[1] / (ctheta + 0.01))) *
-	        (config[2] + config[3] * expM + config[5] * rayM + config[6] * mieM + config[7] * zenith);
+  float ctheta = cos(theta);
+  float cgamma = cos(gamma);
+
+  float expM = exp(config[4] * gamma);
+  float rayM = cgamma * cgamma;
+  float mieM = (1.0 + rayM) / pow((1.0 + config[8] * config[8] - 2.0 * config[8] * cgamma), 1.5);
+  float zenith = sqrt(ctheta);
+
+  return (1.0 + config[0] * exp(config[1] / (ctheta + 0.01))) *
+         (config[2] + config[3] * expM + config[5] * rayM + config[6] * mieM + config[7] * zenith);
 }
 
 color sky_radiance_new(normal dir,
-                       float sunphi, float suntheta, color radiance,
-                       float config_x[9], float config_y[9], float config_z[9])
+                       float sunphi,
+                       float suntheta,
+                       color radiance,
+                       float config_x[9],
+                       float config_y[9],
+                       float config_z[9])
 {
-	/* convert vector to spherical coordinates */
-	vector spherical = sky_spherical_coordinates(dir);
-	float theta = spherical[0];
-	float phi = spherical[1];
+  /* convert vector to spherical coordinates */
+  vector spherical = sky_spherical_coordinates(dir);
+  float theta = spherical[0];
+  float phi = spherical[1];
 
-	/* angle between sun direction and dir */
-	float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
+  /* angle between sun direction and dir */
+  float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
 
-	/* clamp theta to horizon */
-	theta = min(theta, M_PI_2 - 0.001);
+  /* clamp theta to horizon */
+  theta = min(theta, M_PI_2 - 0.001);
 
-	/* compute xyz color space values */
-	float x = sky_radiance_internal(config_x, theta, gamma) * radiance[0];
-	float y = sky_radiance_internal(config_y, theta, gamma) * radiance[1];
-	float z = sky_radiance_internal(config_z, theta, gamma) * radiance[2];
+  /* compute xyz color space values */
+  float x = sky_radiance_internal(config_x, theta, gamma) * radiance[0];
+  float y = sky_radiance_internal(config_y, theta, gamma) * radiance[1];
+  float z = sky_radiance_internal(config_z, theta, gamma) * radiance[2];
 
-	/* convert to RGB and adjust strength */
-	return xyz_to_rgb(x, y, z) * (M_2PI / 683);
+  /* convert to RGB and adjust strength */
+  return xyz_to_rgb(x, y, z) * (M_2PI / 683);
 }
 
-shader node_sky_texture(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	vector Vector = P,
-	string type = "hosek_wilkie",
-	float theta = 0.0,
-	float phi = 0.0,
-	color radiance = color(0.0, 0.0, 0.0),
-	float config_x[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
-	float config_y[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
-	float config_z[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
-	output color Color = color(0.0, 0.0, 0.0))
+shader node_sky_texture(int use_mapping = 0,
+                        matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+                        vector Vector = P,
+                        string type = "hosek_wilkie",
+                        float theta = 0.0,
+                        float phi = 0.0,
+                        color radiance = color(0.0, 0.0, 0.0),
+                        float config_x[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+                        float config_y[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+                        float config_z[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+                        output color Color = color(0.0, 0.0, 0.0))
 {
-	vector p = Vector;
-
-	if (use_mapping)
-		p = transform(mapping, p);
-	
-	if (type == "hosek_wilkie")
-		Color = sky_radiance_new(p, phi, theta, radiance, config_x, config_y, config_z);
-	else
-		Color = sky_radiance_old(p, phi, theta, radiance, config_x, config_y, config_z);
-}
+  vector p = Vector;
 
+  if (use_mapping)
+    p = transform(mapping, p);
+
+  if (type == "hosek_wilkie")
+    Color = sky_radiance_new(p, phi, theta, radiance, config_x, config_y, config_z);
+  else
+    Color = sky_radiance_old(p, phi, theta, radiance, config_x, config_y, config_z);
+}
diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
index 0df3256e1fd..e12199d8c3d 100644
--- a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
+++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
@@ -16,23 +16,30 @@
 
 #include "stdosl.h"
 
-shader node_subsurface_scattering(
-	color Color = 0.8,
-	float Scale = 1.0,
-	vector Radius = vector(0.1, 0.1, 0.1),
-	float TextureBlur = 0.0,
-	float Sharpness = 0.0,
-	string falloff = "cubic",
-	normal Normal = N,
-	output closure color BSSRDF = 0)
+shader node_subsurface_scattering(color Color = 0.8,
+                                  float Scale = 1.0,
+                                  vector Radius = vector(0.1, 0.1, 0.1),
+                                  float TextureBlur = 0.0,
+                                  float Sharpness = 0.0,
+                                  string falloff = "cubic",
+                                  normal Normal = N,
+                                  output closure color BSSRDF = 0)
 {
-	if (falloff == "gaussian")
-		BSSRDF = Color * bssrdf("gaussian", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
-	else if (falloff == "cubic")
-		BSSRDF = Color * bssrdf("cubic", Normal, Scale * Radius, Color, "texture_blur", TextureBlur, "sharpness", Sharpness);
-	else if (falloff == "burley")
-		BSSRDF = Color * bssrdf("burley", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
-	else
-		BSSRDF = Color * bssrdf("random_walk", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
+  if (falloff == "gaussian")
+    BSSRDF = Color *
+             bssrdf("gaussian", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
+  else if (falloff == "cubic")
+    BSSRDF = Color * bssrdf("cubic",
+                            Normal,
+                            Scale * Radius,
+                            Color,
+                            "texture_blur",
+                            TextureBlur,
+                            "sharpness",
+                            Sharpness);
+  else if (falloff == "burley")
+    BSSRDF = Color * bssrdf("burley", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
+  else
+    BSSRDF = Color *
+             bssrdf("random_walk", Normal, Scale * Radius, Color, "texture_blur", TextureBlur);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_tangent.osl b/intern/cycles/kernel/shaders/node_tangent.osl
index c527070a2c8..44eb9973f3d 100644
--- a/intern/cycles/kernel/shaders/node_tangent.osl
+++ b/intern/cycles/kernel/shaders/node_tangent.osl
@@ -16,33 +16,31 @@
 
 #include "stdosl.h"
 
-shader node_tangent(
-	normal NormalIn = N,
-	string attr_name = "geom:tangent",
-	string direction_type = "radial",
-	string axis = "z",
-	output normal Tangent = normalize(dPdu))
+shader node_tangent(normal NormalIn = N,
+                    string attr_name = "geom:tangent",
+                    string direction_type = "radial",
+                    string axis = "z",
+                    output normal Tangent = normalize(dPdu))
 {
-	vector T;
+  vector T;
 
-	if (direction_type == "uv_map") {
-		getattribute(attr_name, T);
-	}
-	else if (direction_type == "radial") {
-		point generated;
+  if (direction_type == "uv_map") {
+    getattribute(attr_name, T);
+  }
+  else if (direction_type == "radial") {
+    point generated;
 
-		if (!getattribute("geom:generated", generated))
-			generated = P;
+    if (!getattribute("geom:generated", generated))
+      generated = P;
 
-		if (axis == "x")
-			T = vector(0.0, -(generated[2] - 0.5), (generated[1] - 0.5));
-		else if (axis == "y")
-			T = vector(-(generated[2] - 0.5), 0.0, (generated[0] - 0.5));
-		else
-			T = vector(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0);
-	}
+    if (axis == "x")
+      T = vector(0.0, -(generated[2] - 0.5), (generated[1] - 0.5));
+    else if (axis == "y")
+      T = vector(-(generated[2] - 0.5), 0.0, (generated[0] - 0.5));
+    else
+      T = vector(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0);
+  }
 
-	T = transform("object", "world", T);
-	Tangent = cross(NormalIn, normalize(cross(T, NormalIn)));
+  T = transform("object", "world", T);
+  Tangent = cross(NormalIn, normalize(cross(T, NormalIn)));
 }
-
diff --git a/intern/cycles/kernel/shaders/node_texture.h b/intern/cycles/kernel/shaders/node_texture.h
index 88305fb320f..e1f3b900ee5 100644
--- a/intern/cycles/kernel/shaders/node_texture.h
+++ b/intern/cycles/kernel/shaders/node_texture.h
@@ -18,148 +18,148 @@
 
 color cellnoise_color(point p)
 {
-	float r = cellnoise(p);
-	float g = cellnoise(point(p[1], p[0], p[2]));
-	float b = cellnoise(point(p[1], p[2], p[0]));
+  float r = cellnoise(p);
+  float g = cellnoise(point(p[1], p[0], p[2]));
+  float b = cellnoise(point(p[1], p[2], p[0]));
 
-	return color(r, g, b);
+  return color(r, g, b);
 }
 
 void voronoi(point p, float e, float da[4], point pa[4])
 {
-	/* returns distances in da and point coords in pa */
-	int xx, yy, zz, xi, yi, zi;
-
-	xi = (int)floor(p[0]);
-	yi = (int)floor(p[1]);
-	zi = (int)floor(p[2]);
-
-	da[0] = 1e10;
-	da[1] = 1e10;
-	da[2] = 1e10;
-	da[3] = 1e10;
-
-	for (xx = xi - 1; xx <= xi + 1; xx++) {
-		for (yy = yi - 1; yy <= yi + 1; yy++) {
-			for (zz = zi - 1; zz <= zi + 1; zz++) {
-				point ip = point(xx, yy, zz);
-				point vp = (point)cellnoise_color(ip);
-				point pd = p - (vp + ip);
-				float d = dot(pd, pd);
-
-				vp += point(xx, yy, zz);
-
-				if (d < da[0]) {
-					da[3] = da[2];
-					da[2] = da[1];
-					da[1] = da[0];
-					da[0] = d;
-
-					pa[3] = pa[2];
-					pa[2] = pa[1];
-					pa[1] = pa[0];
-					pa[0] = vp;
-				}
-				else if (d < da[1]) {
-					da[3] = da[2];
-					da[2] = da[1];
-					da[1] = d;
-
-					pa[3] = pa[2];
-					pa[2] = pa[1];
-					pa[1] = vp;
-				}
-				else if (d < da[2]) {
-					da[3] = da[2];
-					da[2] = d;
-
-					pa[3] = pa[2];
-					pa[2] = vp;
-				}
-				else if (d < da[3]) {
-					da[3] = d;
-					pa[3] = vp;
-				}
-			}
-		}
-	}
+  /* returns distances in da and point coords in pa */
+  int xx, yy, zz, xi, yi, zi;
+
+  xi = (int)floor(p[0]);
+  yi = (int)floor(p[1]);
+  zi = (int)floor(p[2]);
+
+  da[0] = 1e10;
+  da[1] = 1e10;
+  da[2] = 1e10;
+  da[3] = 1e10;
+
+  for (xx = xi - 1; xx <= xi + 1; xx++) {
+    for (yy = yi - 1; yy <= yi + 1; yy++) {
+      for (zz = zi - 1; zz <= zi + 1; zz++) {
+        point ip = point(xx, yy, zz);
+        point vp = (point)cellnoise_color(ip);
+        point pd = p - (vp + ip);
+        float d = dot(pd, pd);
+
+        vp += point(xx, yy, zz);
+
+        if (d < da[0]) {
+          da[3] = da[2];
+          da[2] = da[1];
+          da[1] = da[0];
+          da[0] = d;
+
+          pa[3] = pa[2];
+          pa[2] = pa[1];
+          pa[1] = pa[0];
+          pa[0] = vp;
+        }
+        else if (d < da[1]) {
+          da[3] = da[2];
+          da[2] = da[1];
+          da[1] = d;
+
+          pa[3] = pa[2];
+          pa[2] = pa[1];
+          pa[1] = vp;
+        }
+        else if (d < da[2]) {
+          da[3] = da[2];
+          da[2] = d;
+
+          pa[3] = pa[2];
+          pa[2] = vp;
+        }
+        else if (d < da[3]) {
+          da[3] = d;
+          pa[3] = vp;
+        }
+      }
+    }
+  }
 }
 
 /* Noise Bases */
 
 float safe_noise(point p, string type)
 {
-	float f = 0.0;
+  float f = 0.0;
 
-	/* Perlin noise in range -1..1 */
-	if (type == "signed")
-		f = noise("perlin", p);
+  /* Perlin noise in range -1..1 */
+  if (type == "signed")
+    f = noise("perlin", p);
 
-	/* Perlin noise in range 0..1 */
-	else
-		f = noise(p);
+  /* Perlin noise in range 0..1 */
+  else
+    f = noise(p);
 
-	/* can happen for big coordinates, things even out to 0.5 then anyway */
-	if (!isfinite(f))
-		return 0.5;
+  /* can happen for big coordinates, things even out to 0.5 then anyway */
+  if (!isfinite(f))
+    return 0.5;
 
-	return f;
+  return f;
 }
 
 /* Turbulence */
 
 float noise_turbulence(point p, float details, int hard)
 {
-	float fscale = 1.0;
-	float amp = 1.0;
-	float sum = 0.0;
-	int i, n;
+  float fscale = 1.0;
+  float amp = 1.0;
+  float sum = 0.0;
+  int i, n;
 
-	float octaves = clamp(details, 0.0, 16.0);
-	n = (int)octaves;
+  float octaves = clamp(details, 0.0, 16.0);
+  n = (int)octaves;
 
-	for (i = 0; i <= n; i++) {
-		float t = safe_noise(fscale * p, "unsigned");
+  for (i = 0; i <= n; i++) {
+    float t = safe_noise(fscale * p, "unsigned");
 
-		if (hard)
-			t = fabs(2.0 * t - 1.0);
+    if (hard)
+      t = fabs(2.0 * t - 1.0);
 
-		sum += t * amp;
-		amp *= 0.5;
-		fscale *= 2.0;
-	}
+    sum += t * amp;
+    amp *= 0.5;
+    fscale *= 2.0;
+  }
 
-	float rmd = octaves - floor(octaves);
+  float rmd = octaves - floor(octaves);
 
-	if (rmd != 0.0) {
-		float t = safe_noise(fscale * p, "unsigned");
+  if (rmd != 0.0) {
+    float t = safe_noise(fscale * p, "unsigned");
 
-		if (hard)
-			t = fabs(2.0 * t - 1.0);
+    if (hard)
+      t = fabs(2.0 * t - 1.0);
 
-		float sum2 = sum + t * amp;
+    float sum2 = sum + t * amp;
 
-		sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
-		sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
 
-		return (1.0 - rmd) * sum + rmd * sum2;
-	}
-	else {
-		sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
-		return sum;
-	}
+    return (1.0 - rmd) * sum + rmd * sum2;
+  }
+  else {
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    return sum;
+  }
 }
 
 /* Utility */
 
 float nonzero(float f, float eps)
 {
-	float r;
+  float r;
 
-	if (abs(f) < eps)
-		r = sign(f) * eps;
-	else
-		r = f;
+  if (abs(f) < eps)
+    r = sign(f) * eps;
+  else
+    r = f;
 
-	return r;
+  return r;
 }
diff --git a/intern/cycles/kernel/shaders/node_texture_coordinate.osl b/intern/cycles/kernel/shaders/node_texture_coordinate.osl
index 9e2109fa082..13861653d04 100644
--- a/intern/cycles/kernel/shaders/node_texture_coordinate.osl
+++ b/intern/cycles/kernel/shaders/node_texture_coordinate.osl
@@ -17,82 +17,81 @@
 #include "stdosl.h"
 
 shader node_texture_coordinate(
-	normal NormalIn = N,
-	int is_background = 0,
-	int is_volume = 0,
-	int from_dupli = 0,
-	int use_transform = 0,
-	string bump_offset = "center",
-	matrix object_itfm = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+    normal NormalIn = N,
+    int is_background = 0,
+    int is_volume = 0,
+    int from_dupli = 0,
+    int use_transform = 0,
+    string bump_offset = "center",
+    matrix object_itfm = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
 
-	output point Generated = point(0.0, 0.0, 0.0),
-	output point UV = point(0.0, 0.0, 0.0),
-	output point Object = point(0.0, 0.0, 0.0),
-	output point Camera = point(0.0, 0.0, 0.0),
-	output point Window = point(0.0, 0.0, 0.0),
-	output normal Normal = normal(0.0, 0.0, 0.0),
-	output point Reflection = point(0.0, 0.0, 0.0))
+    output point Generated = point(0.0, 0.0, 0.0),
+    output point UV = point(0.0, 0.0, 0.0),
+    output point Object = point(0.0, 0.0, 0.0),
+    output point Camera = point(0.0, 0.0, 0.0),
+    output point Window = point(0.0, 0.0, 0.0),
+    output normal Normal = normal(0.0, 0.0, 0.0),
+    output point Reflection = point(0.0, 0.0, 0.0))
 {
-	if (is_background) {
-		Generated = P;
-		UV = point(0.0, 0.0, 0.0);
-		Object = P;
-		point Pcam = transform("camera", "world", point(0, 0, 0));
-		Camera = transform("camera", P + Pcam);
-		getattribute("NDC", Window);
-		Normal = NormalIn;
-		Reflection = I;
-	}
-	else {
-		if (from_dupli) {
-			getattribute("geom:dupli_generated", Generated); 
-			getattribute("geom:dupli_uv", UV);
-		}
-		else if (is_volume) {
-			Generated = transform("object", P);
+  if (is_background) {
+    Generated = P;
+    UV = point(0.0, 0.0, 0.0);
+    Object = P;
+    point Pcam = transform("camera", "world", point(0, 0, 0));
+    Camera = transform("camera", P + Pcam);
+    getattribute("NDC", Window);
+    Normal = NormalIn;
+    Reflection = I;
+  }
+  else {
+    if (from_dupli) {
+      getattribute("geom:dupli_generated", Generated);
+      getattribute("geom:dupli_uv", UV);
+    }
+    else if (is_volume) {
+      Generated = transform("object", P);
 
-			matrix tfm;
-			if (getattribute("geom:generated_transform", tfm))
-				Generated = transform(tfm, Generated);
+      matrix tfm;
+      if (getattribute("geom:generated_transform", tfm))
+        Generated = transform(tfm, Generated);
 
-			getattribute("geom:uv", UV);
-		}
-		else {
-			getattribute("geom:generated", Generated); 
-			getattribute("geom:uv", UV);
-		}
+      getattribute("geom:uv", UV);
+    }
+    else {
+      getattribute("geom:generated", Generated);
+      getattribute("geom:uv", UV);
+    }
 
-		if (use_transform) {
-			Object = transform(object_itfm, P);
-		}
-		else {
-			Object = transform("object", P);
-		}
-		Camera = transform("camera", P);
-		Window = transform("NDC", P);
-		Normal = transform("world", "object", NormalIn);
-		Reflection = -reflect(I, NormalIn);
-	}
+    if (use_transform) {
+      Object = transform(object_itfm, P);
+    }
+    else {
+      Object = transform("object", P);
+    }
+    Camera = transform("camera", P);
+    Window = transform("NDC", P);
+    Normal = transform("world", "object", NormalIn);
+    Reflection = -reflect(I, NormalIn);
+  }
 
-	if (bump_offset == "dx") {
-		if (!from_dupli) {
-			Generated += Dx(Generated);
-			UV += Dx(UV);
-		}
-		Object += Dx(Object);
-		Camera += Dx(Camera);
-		Window += Dx(Window);
-	}
-	else if (bump_offset == "dy") {
-		if (!from_dupli) {
-			Generated += Dy(Generated);
-			UV += Dy(UV);
-		}
-		Object += Dy(Object);
-		Camera += Dy(Camera);
-		Window += Dy(Window);
-	}
+  if (bump_offset == "dx") {
+    if (!from_dupli) {
+      Generated += Dx(Generated);
+      UV += Dx(UV);
+    }
+    Object += Dx(Object);
+    Camera += Dx(Camera);
+    Window += Dx(Window);
+  }
+  else if (bump_offset == "dy") {
+    if (!from_dupli) {
+      Generated += Dy(Generated);
+      UV += Dy(UV);
+    }
+    Object += Dy(Object);
+    Camera += Dy(Camera);
+    Window += Dy(Window);
+  }
 
-	Window[2] = 0.0;
+  Window[2] = 0.0;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_toon_bsdf.osl b/intern/cycles/kernel/shaders/node_toon_bsdf.osl
index ae68a463e46..ed3a0b25c60 100644
--- a/intern/cycles/kernel/shaders/node_toon_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_toon_bsdf.osl
@@ -16,17 +16,15 @@
 
 #include "stdosl.h"
 
-shader node_toon_bsdf(
-	color Color = 0.8,
-	string component = "diffuse",
-	float Size = 0.5,
-	float Smooth = 0.0,
-	normal Normal = N,
-	output closure color BSDF = 0)
+shader node_toon_bsdf(color Color = 0.8,
+                      string component = "diffuse",
+                      float Size = 0.5,
+                      float Smooth = 0.0,
+                      normal Normal = N,
+                      output closure color BSDF = 0)
 {
-	if (component == "diffuse")
-		BSDF = Color * diffuse_toon(Normal, Size, Smooth);
-	else if (component == "glossy")
-		BSDF = Color * glossy_toon(Normal, Size, Smooth);
+  if (component == "diffuse")
+    BSDF = Color * diffuse_toon(Normal, Size, Smooth);
+  else if (component == "glossy")
+    BSDF = Color * glossy_toon(Normal, Size, Smooth);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
index 94d23d35326..7ce1ab08c59 100644
--- a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl
@@ -16,11 +16,7 @@
 
 #include "stdosl.h"
 
-shader node_translucent_bsdf(
-	color Color = 0.8,
-	normal Normal = N,
-	output closure color BSDF = 0)
+shader node_translucent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0)
 {
-	BSDF = Color * translucent(Normal);
+  BSDF = Color * translucent(Normal);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
index 5d6798f19a6..a735513ba89 100644
--- a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl
@@ -16,11 +16,7 @@
 
 #include "stdosl.h"
 
-shader node_transparent_bsdf(
-	color Color = 0.8,
-	normal Normal = N,
-	output closure color BSDF = 0)
+shader node_transparent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0)
 {
-	BSDF = Color * transparent();
+  BSDF = Color * transparent();
 }
-
diff --git a/intern/cycles/kernel/shaders/node_uv_map.osl b/intern/cycles/kernel/shaders/node_uv_map.osl
index b46b2e73457..6f2887be63c 100644
--- a/intern/cycles/kernel/shaders/node_uv_map.osl
+++ b/intern/cycles/kernel/shaders/node_uv_map.osl
@@ -16,30 +16,29 @@
 
 #include "stdosl.h"
 
-shader node_uv_map(
-	int from_dupli = 0,
-	string attribute = "",
-	string bump_offset = "center",
-	output point UV = point(0.0, 0.0, 0.0))
+shader node_uv_map(int from_dupli = 0,
+                   string attribute = "",
+                   string bump_offset = "center",
+                   output point UV = point(0.0, 0.0, 0.0))
 {
-	if (from_dupli) {
-		getattribute("geom:dupli_uv", UV);
-	}
-	else {
-		if (attribute == "")
-			getattribute("geom:uv", UV);
-		else
-			getattribute(attribute, UV);
-	}
+  if (from_dupli) {
+    getattribute("geom:dupli_uv", UV);
+  }
+  else {
+    if (attribute == "")
+      getattribute("geom:uv", UV);
+    else
+      getattribute(attribute, UV);
+  }
 
-	if (bump_offset == "dx") {
-		if (!from_dupli) {
-			UV += Dx(UV);
-		}
-	}
-	else if (bump_offset == "dy") {
-		if (!from_dupli) {
-			UV += Dy(UV);
-		}
-	}
+  if (bump_offset == "dx") {
+    if (!from_dupli) {
+      UV += Dx(UV);
+    }
+  }
+  else if (bump_offset == "dy") {
+    if (!from_dupli) {
+      UV += Dy(UV);
+    }
+  }
 }
diff --git a/intern/cycles/kernel/shaders/node_value.osl b/intern/cycles/kernel/shaders/node_value.osl
index f75388d1f76..398e2c0e392 100644
--- a/intern/cycles/kernel/shaders/node_value.osl
+++ b/intern/cycles/kernel/shaders/node_value.osl
@@ -16,16 +16,14 @@
 
 #include "stdosl.h"
 
-shader node_value(
-	float value_value = 0.0,
-	vector vector_value = vector(0.0, 0.0, 0.0),
-	color color_value = 0.0,
-	output float Value = 0.0,
-	output vector Vector = vector(0.0, 0.0, 0.0),
-	output color Color = 0.0)
+shader node_value(float value_value = 0.0,
+                  vector vector_value = vector(0.0, 0.0, 0.0),
+                  color color_value = 0.0,
+                  output float Value = 0.0,
+                  output vector Vector = vector(0.0, 0.0, 0.0),
+                  output color Color = 0.0)
 {
-	Value = value_value;
-	Vector = vector_value;
-	Color = color_value;
+  Value = value_value;
+  Vector = vector_value;
+  Color = color_value;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_vector_curves.osl b/intern/cycles/kernel/shaders/node_vector_curves.osl
index ff284c48e0a..e8c8036b550 100644
--- a/intern/cycles/kernel/shaders/node_vector_curves.osl
+++ b/intern/cycles/kernel/shaders/node_vector_curves.osl
@@ -17,25 +17,23 @@
 #include "stdosl.h"
 #include "node_ramp_util.h"
 
-shader node_vector_curves(
-	color ramp[] = {0.0},
-	float min_x = 0.0,
-	float max_x = 1.0,
+shader node_vector_curves(color ramp[] = {0.0},
+                          float min_x = 0.0,
+                          float max_x = 1.0,
 
-	vector VectorIn = vector(0.0, 0.0, 0.0),
-	float Fac = 0.0,
-	output vector VectorOut = vector(0.0, 0.0, 0.0))
+                          vector VectorIn = vector(0.0, 0.0, 0.0),
+                          float Fac = 0.0,
+                          output vector VectorOut = vector(0.0, 0.0, 0.0))
 {
-	vector c = (VectorIn - vector(min_x, min_x, min_x)) / (max_x - min_x);
+  vector c = (VectorIn - vector(min_x, min_x, min_x)) / (max_x - min_x);
 
-	color r = rgb_ramp_lookup(ramp, c[0], 1, 1);
-	color g = rgb_ramp_lookup(ramp, c[0], 1, 1);
-	color b = rgb_ramp_lookup(ramp, c[0], 1, 1);
+  color r = rgb_ramp_lookup(ramp, c[0], 1, 1);
+  color g = rgb_ramp_lookup(ramp, c[0], 1, 1);
+  color b = rgb_ramp_lookup(ramp, c[0], 1, 1);
 
-	VectorOut[0] = r[0];
-	VectorOut[1] = g[1];
-	VectorOut[2] = b[2];
+  VectorOut[0] = r[0];
+  VectorOut[1] = g[1];
+  VectorOut[2] = b[2];
 
-	VectorOut = mix(VectorIn, VectorOut, Fac);
+  VectorOut = mix(VectorIn, VectorOut, Fac);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_vector_displacement.osl b/intern/cycles/kernel/shaders/node_vector_displacement.osl
index b19bc228e37..e9bd336347f 100644
--- a/intern/cycles/kernel/shaders/node_vector_displacement.osl
+++ b/intern/cycles/kernel/shaders/node_vector_displacement.osl
@@ -16,45 +16,43 @@
 
 #include "stdosl.h"
 
-shader node_vector_displacement(
-	color Vector = color(0.0, 0.0, 0.0),
-	float Midlevel = 0.0,
-	float Scale = 1.0,
-	string space = "tangent",
-	string attr_name = "geom:tangent",
-	string attr_sign_name = "geom:tangent_sign",
-	output vector Displacement = vector(0.0, 0.0, 0.0))
+shader node_vector_displacement(color Vector = color(0.0, 0.0, 0.0),
+                                float Midlevel = 0.0,
+                                float Scale = 1.0,
+                                string space = "tangent",
+                                string attr_name = "geom:tangent",
+                                string attr_sign_name = "geom:tangent_sign",
+                                output vector Displacement = vector(0.0, 0.0, 0.0))
 {
-	vector offset = (Vector - vector(Midlevel)) * Scale;
-
-	if(space == "tangent") {
-		/* Tangent space. */
-		vector N_object = normalize(transform("world", "object", N));
-
-		vector T_object;
-		if(getattribute(attr_name, T_object)) {
-			T_object = normalize(T_object);
-		}
-		else {
-			T_object = normalize(dPdu);
-		}
-
-		vector B_object = normalize(cross(N_object, T_object));
-		float tangent_sign;
-		if(getattribute(attr_sign_name, tangent_sign)) {
-			B_object *= tangent_sign;
-		}
-
-		Displacement = T_object*offset[0] + N_object*offset[1] + B_object*offset[2];
-	}
-	else {
-		/* Object or world space. */
-		Displacement = offset;
-	}
-
-	if(space != "world") {
-		/* Tangent or object space. */
-		Displacement = transform("object", "world", Displacement);
-	}
+  vector offset = (Vector - vector(Midlevel)) * Scale;
+
+  if (space == "tangent") {
+    /* Tangent space. */
+    vector N_object = normalize(transform("world", "object", N));
+
+    vector T_object;
+    if (getattribute(attr_name, T_object)) {
+      T_object = normalize(T_object);
+    }
+    else {
+      T_object = normalize(dPdu);
+    }
+
+    vector B_object = normalize(cross(N_object, T_object));
+    float tangent_sign;
+    if (getattribute(attr_sign_name, tangent_sign)) {
+      B_object *= tangent_sign;
+    }
+
+    Displacement = T_object * offset[0] + N_object * offset[1] + B_object * offset[2];
+  }
+  else {
+    /* Object or world space. */
+    Displacement = offset;
+  }
+
+  if (space != "world") {
+    /* Tangent or object space. */
+    Displacement = transform("object", "world", Displacement);
+  }
 }
-
diff --git a/intern/cycles/kernel/shaders/node_vector_math.osl b/intern/cycles/kernel/shaders/node_vector_math.osl
index a7e3637402e..10bb0c7283c 100644
--- a/intern/cycles/kernel/shaders/node_vector_math.osl
+++ b/intern/cycles/kernel/shaders/node_vector_math.osl
@@ -16,36 +16,34 @@
 
 #include "stdosl.h"
 
-shader node_vector_math(
-	string type = "add",
-	vector Vector1 = vector(0.0, 0.0, 0.0),
-	vector Vector2 = vector(0.0, 0.0, 0.0),
-	output float Value = 0.0,
-	output vector Vector = vector(0.0, 0.0, 0.0))
+shader node_vector_math(string type = "add",
+                        vector Vector1 = vector(0.0, 0.0, 0.0),
+                        vector Vector2 = vector(0.0, 0.0, 0.0),
+                        output float Value = 0.0,
+                        output vector Vector = vector(0.0, 0.0, 0.0))
 {
-	if (type == "add") {
-		Vector = Vector1 + Vector2;
-		Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
-	}
-	else if (type == "subtract") {
-		Vector = Vector1 - Vector2;
-		Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
-	}
-	else if (type == "average") {
-		Value = length(Vector1 + Vector2);
-		Vector = normalize(Vector1 + Vector2);
-	}
-	else if (type == "dot_product") {
-		Value = dot(Vector1, Vector2);
-	}
-	else if (type == "cross_product") {
-		vector c = cross(Vector1, Vector2);
-		Value = length(c);
-		Vector = normalize(c);
-	}
-	else if (type == "normalize") {
-		Value = length(Vector1);
-		Vector = normalize(Vector1);
-	}
+  if (type == "add") {
+    Vector = Vector1 + Vector2;
+    Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
+  }
+  else if (type == "subtract") {
+    Vector = Vector1 - Vector2;
+    Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
+  }
+  else if (type == "average") {
+    Value = length(Vector1 + Vector2);
+    Vector = normalize(Vector1 + Vector2);
+  }
+  else if (type == "dot_product") {
+    Value = dot(Vector1, Vector2);
+  }
+  else if (type == "cross_product") {
+    vector c = cross(Vector1, Vector2);
+    Value = length(c);
+    Vector = normalize(c);
+  }
+  else if (type == "normalize") {
+    Value = length(Vector1);
+    Vector = normalize(Vector1);
+  }
 }
-
diff --git a/intern/cycles/kernel/shaders/node_vector_transform.osl b/intern/cycles/kernel/shaders/node_vector_transform.osl
index afb95b340d1..22939577be0 100644
--- a/intern/cycles/kernel/shaders/node_vector_transform.osl
+++ b/intern/cycles/kernel/shaders/node_vector_transform.osl
@@ -16,21 +16,19 @@
 
 #include "stdosl.h"
 
-shader node_vector_transform(
-	string type = "vector",
-	string convert_from = "world",
-	string convert_to = "object",
-	vector VectorIn = vector(0.0, 0.0, 0.0),
-	output vector VectorOut = vector(0.0, 0.0, 0.0))
+shader node_vector_transform(string type = "vector",
+                             string convert_from = "world",
+                             string convert_to = "object",
+                             vector VectorIn = vector(0.0, 0.0, 0.0),
+                             output vector VectorOut = vector(0.0, 0.0, 0.0))
 {
-	if (type == "vector" || type == "normal") {
-		VectorOut = transform(convert_from, convert_to, VectorIn);
-		if (type == "normal")
-			VectorOut = normalize(VectorOut);
-	}
-	else if (type == "point") {
-		point Point = (point)VectorIn;
-		VectorOut = transform(convert_from, convert_to, Point);
-	}
+  if (type == "vector" || type == "normal") {
+    VectorOut = transform(convert_from, convert_to, VectorIn);
+    if (type == "normal")
+      VectorOut = normalize(VectorOut);
+  }
+  else if (type == "point") {
+    point Point = (point)VectorIn;
+    VectorOut = transform(convert_from, convert_to, Point);
+  }
 }
-
diff --git a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
index 456c26998c8..9290b845325 100644
--- a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl
@@ -17,14 +17,12 @@
 #include "stdosl.h"
 #include "node_fresnel.h"
 
-shader node_velvet_bsdf(
-	color Color = 0.8,
-	float Sigma = 0.0,
-	normal Normal = N,
-	output closure color BSDF = 0)
+shader node_velvet_bsdf(color Color = 0.8,
+                        float Sigma = 0.0,
+                        normal Normal = N,
+                        output closure color BSDF = 0)
 {
-	float sigma = clamp(Sigma, 0.0, 1.0);
+  float sigma = clamp(Sigma, 0.0, 1.0);
 
-	BSDF = Color * ashikhmin_velvet(Normal, sigma);
+  BSDF = Color * ashikhmin_velvet(Normal, sigma);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_voronoi_texture.osl b/intern/cycles/kernel/shaders/node_voronoi_texture.osl
index 2e47d74a414..34c86d5b98d 100644
--- a/intern/cycles/kernel/shaders/node_voronoi_texture.osl
+++ b/intern/cycles/kernel/shaders/node_voronoi_texture.osl
@@ -19,147 +19,146 @@
 
 void voronoi_m(point p, string metric, float e, float da[4], point pa[4])
 {
-	/* Compute the distance to and the position of the four closest neighbors to p.
-	 *
-	 * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern).
-	 * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will
-	 * contain the distance to the closest point and its coordinates respectively.
-	 */
-	int xx, yy, zz, xi, yi, zi;
-
-	xi = (int)floor(p[0]);
-	yi = (int)floor(p[1]);
-	zi = (int)floor(p[2]);
-
-	da[0] = 1e10;
-	da[1] = 1e10;
-	da[2] = 1e10;
-	da[3] = 1e10;
-
-	for (xx = xi - 1; xx <= xi + 1; xx++) {
-		for (yy = yi - 1; yy <= yi + 1; yy++) {
-			for (zz = zi - 1; zz <= zi + 1; zz++) {
-				point ip = point(xx, yy, zz);
-				point vp = (point)cellnoise_color(ip);
-				point pd = p - (vp + ip);
-
-				float d = 0.0;
-				if (metric == "distance") {
-					d = dot(pd, pd);
-				}
-				else if (metric == "manhattan") {
-					d = fabs(pd[0]) + fabs(pd[1]) + fabs(pd[2]);
-				}
-				else if (metric == "chebychev") {
-					d = max(fabs(pd[0]), max(fabs(pd[1]), fabs(pd[2])));
-				}
-				else if (metric == "minkowski") {
-					d = pow(pow(fabs(pd[0]), e) + pow(fabs(pd[1]), e) + pow(fabs(pd[2]), e), 1.0/e);
-				}
-
-				vp += point(xx, yy, zz);
-
-				if (d < da[0]) {
-					da[3] = da[2];
-					da[2] = da[1];
-					da[1] = da[0];
-					da[0] = d;
-
-					pa[3] = pa[2];
-					pa[2] = pa[1];
-					pa[1] = pa[0];
-					pa[0] = vp;
-				}
-				else if (d < da[1]) {
-					da[3] = da[2];
-					da[2] = da[1];
-					da[1] = d;
-
-					pa[3] = pa[2];
-					pa[2] = pa[1];
-					pa[1] = vp;
-				}
-				else if (d < da[2]) {
-					da[3] = da[2];
-					da[2] = d;
-
-					pa[3] = pa[2];
-					pa[2] = vp;
-				}
-				else if (d < da[3]) {
-					da[3] = d;
-					pa[3] = vp;
-				}
-			}
-		}
-	}
+  /* Compute the distance to and the position of the four closest neighbors to p.
+   *
+   * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern).
+   * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will
+   * contain the distance to the closest point and its coordinates respectively.
+   */
+  int xx, yy, zz, xi, yi, zi;
+
+  xi = (int)floor(p[0]);
+  yi = (int)floor(p[1]);
+  zi = (int)floor(p[2]);
+
+  da[0] = 1e10;
+  da[1] = 1e10;
+  da[2] = 1e10;
+  da[3] = 1e10;
+
+  for (xx = xi - 1; xx <= xi + 1; xx++) {
+    for (yy = yi - 1; yy <= yi + 1; yy++) {
+      for (zz = zi - 1; zz <= zi + 1; zz++) {
+        point ip = point(xx, yy, zz);
+        point vp = (point)cellnoise_color(ip);
+        point pd = p - (vp + ip);
+
+        float d = 0.0;
+        if (metric == "distance") {
+          d = dot(pd, pd);
+        }
+        else if (metric == "manhattan") {
+          d = fabs(pd[0]) + fabs(pd[1]) + fabs(pd[2]);
+        }
+        else if (metric == "chebychev") {
+          d = max(fabs(pd[0]), max(fabs(pd[1]), fabs(pd[2])));
+        }
+        else if (metric == "minkowski") {
+          d = pow(pow(fabs(pd[0]), e) + pow(fabs(pd[1]), e) + pow(fabs(pd[2]), e), 1.0 / e);
+        }
+
+        vp += point(xx, yy, zz);
+
+        if (d < da[0]) {
+          da[3] = da[2];
+          da[2] = da[1];
+          da[1] = da[0];
+          da[0] = d;
+
+          pa[3] = pa[2];
+          pa[2] = pa[1];
+          pa[1] = pa[0];
+          pa[0] = vp;
+        }
+        else if (d < da[1]) {
+          da[3] = da[2];
+          da[2] = da[1];
+          da[1] = d;
+
+          pa[3] = pa[2];
+          pa[2] = pa[1];
+          pa[1] = vp;
+        }
+        else if (d < da[2]) {
+          da[3] = da[2];
+          da[2] = d;
+
+          pa[3] = pa[2];
+          pa[2] = vp;
+        }
+        else if (d < da[3]) {
+          da[3] = d;
+          pa[3] = vp;
+        }
+      }
+    }
+  }
 }
 
 /* Voronoi */
 
 shader node_voronoi_texture(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	string coloring = "intensity",
-	string metric = "distance",
-	string feature = "F1",
-	float Exponent = 1.0,
-	float Scale = 5.0,
-	point Vector = P,
-	output float Fac = 0.0,
-	output color Color = 0.0)
+    int use_mapping = 0,
+    matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+    string coloring = "intensity",
+    string metric = "distance",
+    string feature = "F1",
+    float Exponent = 1.0,
+    float Scale = 5.0,
+    point Vector = P,
+    output float Fac = 0.0,
+    output color Color = 0.0)
 {
-	point p = Vector;
-
-	if (use_mapping)
-		p = transform(mapping, p);
-
-	/* compute distance and point coordinate of 4 nearest neighbours */
-	float da[4];
-	point pa[4];
-
-	/* compute distance and point coordinate of 4 nearest neighbours */
-	voronoi_m(p * Scale, metric, Exponent, da, pa);
-
-	if (coloring == "intensity") {
-		/* Intensity output */
-		if (feature == "F1") {
-			Fac = fabs(da[0]);
-		}
-		else if (feature == "F2") {
-			Fac = fabs(da[1]);
-		}
-		else if (feature == "F3") {
-			Fac = fabs(da[2]);
-		}
-		else if (feature == "F4") {
-			Fac = fabs(da[3]);
-		}
-		else if (feature == "F2F1") {
-			Fac = fabs(da[1] - da[0]);
-		}
-		Color = color(Fac);
-	}
-	else {
-		/* Color output */
-		if (feature == "F1") {
-			Color = pa[0];
-		}
-		else if (feature == "F2") {
-			Color = pa[1];
-		}
-		else if (feature == "F3") {
-			Color = pa[2];
-		}
-		else if (feature == "F4") {
-			Color = pa[3];
-		}
-		else if (feature == "F2F1") {
-			Color = fabs(pa[1] - pa[0]);
-		}
-
-		Color = cellnoise_color(Color);
-		Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0);
-	}
+  point p = Vector;
+
+  if (use_mapping)
+    p = transform(mapping, p);
+
+  /* compute distance and point coordinate of 4 nearest neighbours */
+  float da[4];
+  point pa[4];
+
+  /* compute distance and point coordinate of 4 nearest neighbours */
+  voronoi_m(p * Scale, metric, Exponent, da, pa);
+
+  if (coloring == "intensity") {
+    /* Intensity output */
+    if (feature == "F1") {
+      Fac = fabs(da[0]);
+    }
+    else if (feature == "F2") {
+      Fac = fabs(da[1]);
+    }
+    else if (feature == "F3") {
+      Fac = fabs(da[2]);
+    }
+    else if (feature == "F4") {
+      Fac = fabs(da[3]);
+    }
+    else if (feature == "F2F1") {
+      Fac = fabs(da[1] - da[0]);
+    }
+    Color = color(Fac);
+  }
+  else {
+    /* Color output */
+    if (feature == "F1") {
+      Color = pa[0];
+    }
+    else if (feature == "F2") {
+      Color = pa[1];
+    }
+    else if (feature == "F3") {
+      Color = pa[2];
+    }
+    else if (feature == "F4") {
+      Color = pa[3];
+    }
+    else if (feature == "F2F1") {
+      Color = fabs(pa[1] - pa[0]);
+    }
+
+    Color = cellnoise_color(Color);
+    Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0);
+  }
 }
-
diff --git a/intern/cycles/kernel/shaders/node_voxel_texture.osl b/intern/cycles/kernel/shaders/node_voxel_texture.osl
index 9253febd64a..0e4484561d8 100644
--- a/intern/cycles/kernel/shaders/node_voxel_texture.osl
+++ b/intern/cycles/kernel/shaders/node_voxel_texture.osl
@@ -16,32 +16,30 @@
 
 #include "stdosl.h"
 
-shader node_voxel_texture(
-	string filename = "",
-	string interpolation = "linear",
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	point Vector = P,
-	output float Density = 0,
-	output color Color = 0)
+shader node_voxel_texture(string filename = "",
+                          string interpolation = "linear",
+                          int use_mapping = 0,
+                          matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+                          point Vector = P,
+                          output float Density = 0,
+                          output color Color = 0)
 {
-	point p = Vector;
-	if (use_mapping) {
-		p = transform(mapping, p);
-	}
-	else {
-		p = transform("object", Vector);
-		matrix tfm;
-		if (getattribute("geom:generated_transform", tfm))
-			p = transform(tfm, p);
-	}
-	if (p[0] < 0.0 || p[1] < 0.0 || p[2] < 0.0 ||
-	    p[0] > 1.0 || p[1] > 1.0 || p[2] > 1.0)
-	{
-		Density = 0;
-		Color = color(0, 0, 0);
-	}
-	else {
-		Color = (color)texture3d(filename, p, "wrap", "periodic", "interp", interpolation, "alpha", Density);
-	}
+  point p = Vector;
+  if (use_mapping) {
+    p = transform(mapping, p);
+  }
+  else {
+    p = transform("object", Vector);
+    matrix tfm;
+    if (getattribute("geom:generated_transform", tfm))
+      p = transform(tfm, p);
+  }
+  if (p[0] < 0.0 || p[1] < 0.0 || p[2] < 0.0 || p[0] > 1.0 || p[1] > 1.0 || p[2] > 1.0) {
+    Density = 0;
+    Color = color(0, 0, 0);
+  }
+  else {
+    Color = (color)texture3d(
+        filename, p, "wrap", "periodic", "interp", interpolation, "alpha", Density);
+  }
 }
diff --git a/intern/cycles/kernel/shaders/node_wave_texture.osl b/intern/cycles/kernel/shaders/node_wave_texture.osl
index 71bc9324705..dfc2dbfb800 100644
--- a/intern/cycles/kernel/shaders/node_wave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_wave_texture.osl
@@ -21,49 +21,47 @@
 
 float wave(point p, string type, string profile, float detail, float distortion, float dscale)
 {
-	float n = 0.0;
+  float n = 0.0;
 
-	if (type == "bands") {
-		n = (p[0] + p[1] + p[2]) * 10.0;
-	}
-	else if (type == "rings") {
-		n = length(p) * 20.0;
-	}
+  if (type == "bands") {
+    n = (p[0] + p[1] + p[2]) * 10.0;
+  }
+  else if (type == "rings") {
+    n = length(p) * 20.0;
+  }
 
-	if (distortion != 0.0) {
-		n = n + (distortion * noise_turbulence(p * dscale, detail, 0));
-	}
+  if (distortion != 0.0) {
+    n = n + (distortion * noise_turbulence(p * dscale, detail, 0));
+  }
 
-	if (profile == "sine") {
-		return 0.5 + 0.5 * sin(n);
-	}
-	else {
-		/* Saw profile */
-		n /= M_2PI;
-		n -= (int) n;
-		return (n < 0.0) ? n + 1.0 : n;
-	}
+  if (profile == "sine") {
+    return 0.5 + 0.5 * sin(n);
+  }
+  else {
+    /* Saw profile */
+    n /= M_2PI;
+    n -= (int)n;
+    return (n < 0.0) ? n + 1.0 : n;
+  }
 }
 
-shader node_wave_texture(
-	int use_mapping = 0,
-	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	string type = "bands",
-	string profile = "sine",
-	float Scale = 5.0,
-	float Distortion = 0.0,
-	float Detail = 2.0,
-	float DetailScale = 1.0,
-	point Vector = P,
-	output float Fac = 0.0,
-	output color Color = 0.0)
+shader node_wave_texture(int use_mapping = 0,
+                         matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+                         string type = "bands",
+                         string profile = "sine",
+                         float Scale = 5.0,
+                         float Distortion = 0.0,
+                         float Detail = 2.0,
+                         float DetailScale = 1.0,
+                         point Vector = P,
+                         output float Fac = 0.0,
+                         output color Color = 0.0)
 {
-	point p = Vector;
+  point p = Vector;
 
-	if (use_mapping)
-		p = transform(mapping, p);
+  if (use_mapping)
+    p = transform(mapping, p);
 
-	Fac = wave(p * Scale, type, profile, Detail, Distortion, DetailScale);
-	Color = Fac;
+  Fac = wave(p * Scale, type, profile, Detail, Distortion, DetailScale);
+  Color = Fac;
 }
-
diff --git a/intern/cycles/kernel/shaders/node_wavelength.osl b/intern/cycles/kernel/shaders/node_wavelength.osl
index 79e7043d4bf..c8c6eecb171 100644
--- a/intern/cycles/kernel/shaders/node_wavelength.osl
+++ b/intern/cycles/kernel/shaders/node_wavelength.osl
@@ -16,10 +16,7 @@
 
 #include "stdosl.h"
 
-shader node_wavelength(
-	float Wavelength = 500.0,
-	output color Color = 0.0)
+shader node_wavelength(float Wavelength = 500.0, output color Color = 0.0)
 {
-	Color = wavelength_color(Wavelength);
+  Color = wavelength_color(Wavelength);
 }
-
diff --git a/intern/cycles/kernel/shaders/node_wireframe.osl b/intern/cycles/kernel/shaders/node_wireframe.osl
index 5cc214495dd..ea4bd3a4c87 100644
--- a/intern/cycles/kernel/shaders/node_wireframe.osl
+++ b/intern/cycles/kernel/shaders/node_wireframe.osl
@@ -17,25 +17,24 @@
 #include "stdosl.h"
 #include "oslutil.h"
 
-shader node_wireframe(
-	string bump_offset = "center",
-	int use_pixel_size = 0,
-	float Size = 0.01,
-	output float Fac = 0.0)
+shader node_wireframe(string bump_offset = "center",
+                      int use_pixel_size = 0,
+                      float Size = 0.01,
+                      output float Fac = 0.0)
 {
-	Fac = wireframe("triangles", Size, use_pixel_size);
-	/* TODO(sergey): Since we can't use autodiff here we do algebraic
-	 * calculation of derivatives by definition. We could probably
-	 * optimize this a bit by doing some extra calculation in wireframe().
-	 */
-	if (bump_offset == "dx") {
-		point dx = Dx(P);
-		P -= dx;
-		Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dx);
-	}
-	else if (bump_offset == "dy") {
-		point dy = Dy(P);
-		P -= dy;
-		Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dy);
-	}
+  Fac = wireframe("triangles", Size, use_pixel_size);
+  /* TODO(sergey): Since we can't use autodiff here we do algebraic
+   * calculation of derivatives by definition. We could probably
+   * optimize this a bit by doing some extra calculation in wireframe().
+   */
+  if (bump_offset == "dx") {
+    point dx = Dx(P);
+    P -= dx;
+    Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dx);
+  }
+  else if (bump_offset == "dy") {
+    point dy = Dy(P);
+    P -= dy;
+    Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dy);
+  }
 }
diff --git a/intern/cycles/kernel/shaders/oslutil.h b/intern/cycles/kernel/shaders/oslutil.h
index 592a8ad12d9..d48bfa4a665 100644
--- a/intern/cycles/kernel/shaders/oslutil.h
+++ b/intern/cycles/kernel/shaders/oslutil.h
@@ -39,57 +39,63 @@
 //
 float wireframe(string edge_type, float line_width, int raster)
 {
-   // ray differentials are so big in diffuse context that this function would always return "wire"
-   if (raytype("path:diffuse")) return 0.0;
+  // ray differentials are so big in diffuse context that this function would always return "wire"
+  if (raytype("path:diffuse"))
+    return 0.0;
 
-   int np = 0;
-   point p[64];
-   float pixelWidth = 1;
+  int np = 0;
+  point p[64];
+  float pixelWidth = 1;
 
-   if (edge_type == "triangles")
-   {
-      np = 3;
-      if (!getattribute("geom:trianglevertices", p))
-         return 0.0;
-   }
-   else if (edge_type == "polygons" || edge_type == "patches")
-   {
-      getattribute("geom:numpolyvertices", np);
-      if (np < 3 || !getattribute("geom:polyvertices", p))
-         return 0.0;
-   }
+  if (edge_type == "triangles") {
+    np = 3;
+    if (!getattribute("geom:trianglevertices", p))
+      return 0.0;
+  }
+  else if (edge_type == "polygons" || edge_type == "patches") {
+    getattribute("geom:numpolyvertices", np);
+    if (np < 3 || !getattribute("geom:polyvertices", p))
+      return 0.0;
+  }
 
-   if (raster)
-   {
-      // Project the derivatives of P to the viewing plane defined
-      // by I so we have a measure of how big is a pixel at this point
-      float pixelWidthX = length(Dx(P) - dot(Dx(P), I) * I);
-      float pixelWidthY = length(Dy(P) - dot(Dy(P), I) * I);
-      // Take the average of both axis' length
-      pixelWidth = (pixelWidthX + pixelWidthY) / 2;
-   }
+  if (raster) {
+    // Project the derivatives of P to the viewing plane defined
+    // by I so we have a measure of how big is a pixel at this point
+    float pixelWidthX = length(Dx(P) - dot(Dx(P), I) * I);
+    float pixelWidthY = length(Dy(P) - dot(Dy(P), I) * I);
+    // Take the average of both axis' length
+    pixelWidth = (pixelWidthX + pixelWidthY) / 2;
+  }
 
-   // Use half the width as the neighbor face will render the
-   // other half. And take the square for fast comparison
-   pixelWidth *= 0.5 * line_width;
-   pixelWidth *= pixelWidth;
-   for (int i = 0; i < np; i++)
-   {
-      int i2 = i ? i - 1 : np - 1;
-      vector dir = P - p[i];
-      vector edge = p[i] - p[i2];
-      vector crs = cross(edge, dir);
-      // At this point dot(crs, crs) / dot(edge, edge) is
-      // the square of area / length(edge) == square of the
-      // distance to the edge.
-      if (dot(crs, crs) < (dot(edge, edge) * pixelWidth))
-         return 1;
-   }
-   return 0;
+  // Use half the width as the neighbor face will render the
+  // other half. And take the square for fast comparison
+  pixelWidth *= 0.5 * line_width;
+  pixelWidth *= pixelWidth;
+  for (int i = 0; i < np; i++) {
+    int i2 = i ? i - 1 : np - 1;
+    vector dir = P - p[i];
+    vector edge = p[i] - p[i2];
+    vector crs = cross(edge, dir);
+    // At this point dot(crs, crs) / dot(edge, edge) is
+    // the square of area / length(edge) == square of the
+    // distance to the edge.
+    if (dot(crs, crs) < (dot(edge, edge) * pixelWidth))
+      return 1;
+  }
+  return 0;
 }
 
-float wireframe(string edge_type, float line_width) { return wireframe(edge_type, line_width, 1); }
-float wireframe(string edge_type) { return wireframe(edge_type, 1.0, 1); }
-float wireframe() { return wireframe("polygons", 1.0, 1); }
+float wireframe(string edge_type, float line_width)
+{
+  return wireframe(edge_type, line_width, 1);
+}
+float wireframe(string edge_type)
+{
+  return wireframe(edge_type, 1.0, 1);
+}
+float wireframe()
+{
+  return wireframe("polygons", 1.0, 1);
+}
 
-#endif  /* CCL_OSLUTIL_H */
+#endif /* CCL_OSLUTIL_H */
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
index 7136c746321..9b9720ffff9 100644
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ b/intern/cycles/kernel/shaders/stdosl.h
@@ -25,124 +25,215 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 /////////////////////////////////////////////////////////////////////////////
 
-
 #ifndef CCL_STDOSL_H
 #define CCL_STDOSL_H
 
-
 #ifndef M_PI
-#define M_PI       3.1415926535897932        /* pi */
-#define M_PI_2     1.5707963267948966        /* pi/2 */
-#define M_PI_4     0.7853981633974483        /* pi/4 */
-#define M_2_PI     0.6366197723675813        /* 2/pi */
-#define M_2PI      6.2831853071795865        /* 2*pi */
-#define M_4PI     12.566370614359173         /* 4*pi */
-#define M_2_SQRTPI 1.1283791670955126        /* 2/sqrt(pi) */
-#define M_E        2.7182818284590452        /* e (Euler's number) */
-#define M_LN2      0.6931471805599453        /* ln(2) */
-#define M_LN10     2.3025850929940457        /* ln(10) */
-#define M_LOG2E    1.4426950408889634        /* log_2(e) */
-#define M_LOG10E   0.4342944819032518        /* log_10(e) */
-#define M_SQRT2    1.4142135623730950        /* sqrt(2) */
-#define M_SQRT1_2  0.7071067811865475        /* 1/sqrt(2) */
+#  define M_PI 3.1415926535897932       /* pi */
+#  define M_PI_2 1.5707963267948966     /* pi/2 */
+#  define M_PI_4 0.7853981633974483     /* pi/4 */
+#  define M_2_PI 0.6366197723675813     /* 2/pi */
+#  define M_2PI 6.2831853071795865      /* 2*pi */
+#  define M_4PI 12.566370614359173      /* 4*pi */
+#  define M_2_SQRTPI 1.1283791670955126 /* 2/sqrt(pi) */
+#  define M_E 2.7182818284590452        /* e (Euler's number) */
+#  define M_LN2 0.6931471805599453      /* ln(2) */
+#  define M_LN10 2.3025850929940457     /* ln(10) */
+#  define M_LOG2E 1.4426950408889634    /* log_2(e) */
+#  define M_LOG10E 0.4342944819032518   /* log_10(e) */
+#  define M_SQRT2 1.4142135623730950    /* sqrt(2) */
+#  define M_SQRT1_2 0.7071067811865475  /* 1/sqrt(2) */
 #endif
 
-
-
 // Declaration of built-in functions and closures
-#define BUILTIN [[ int builtin = 1 ]]
+#define BUILTIN [[int builtin = 1]]
 #define BUILTIN_DERIV [[ int builtin = 1, int deriv = 1 ]]
 
-#define PERCOMP1(name)                          \
-    normal name (normal x) BUILTIN;             \
-    vector name (vector x) BUILTIN;             \
-    point  name (point x) BUILTIN;              \
-    color  name (color x) BUILTIN;              \
-    float  name (float x) BUILTIN;
-
-#define PERCOMP2(name)                          \
-    normal name (normal x, normal y) BUILTIN;   \
-    vector name (vector x, vector y) BUILTIN;   \
-    point  name (point x, point y) BUILTIN;     \
-    color  name (color x, color y) BUILTIN;     \
-    float  name (float x, float y) BUILTIN;
-
-#define PERCOMP2F(name)                         \
-    normal name (normal x, float y) BUILTIN;    \
-    vector name (vector x, float y) BUILTIN;    \
-    point  name (point x, float y) BUILTIN;     \
-    color  name (color x, float y) BUILTIN;     \
-    float  name (float x, float y) BUILTIN;
-
+#define PERCOMP1(name) \
+  normal name(normal x) BUILTIN; \
+  vector name(vector x) BUILTIN; \
+  point name(point x) BUILTIN; \
+  color name(color x) BUILTIN; \
+  float name(float x) BUILTIN;
+
+#define PERCOMP2(name) \
+  normal name(normal x, normal y) BUILTIN; \
+  vector name(vector x, vector y) BUILTIN; \
+  point name(point x, point y) BUILTIN; \
+  color name(color x, color y) BUILTIN; \
+  float name(float x, float y) BUILTIN;
+
+#define PERCOMP2F(name) \
+  normal name(normal x, float y) BUILTIN; \
+  vector name(vector x, float y) BUILTIN; \
+  point name(point x, float y) BUILTIN; \
+  color name(color x, float y) BUILTIN; \
+  float name(float x, float y) BUILTIN;
 
 // Basic math
-normal degrees (normal x) { return x*(180.0/M_PI); }
-vector degrees (vector x) { return x*(180.0/M_PI); }
-point  degrees (point x)  { return x*(180.0/M_PI); }
-color  degrees (color x)  { return x*(180.0/M_PI); }
-float  degrees (float x)  { return x*(180.0/M_PI); }
-normal radians (normal x) { return x*(M_PI/180.0); }
-vector radians (vector x) { return x*(M_PI/180.0); }
-point  radians (point x)  { return x*(M_PI/180.0); }
-color  radians (color x)  { return x*(M_PI/180.0); }
-float  radians (float x)  { return x*(M_PI/180.0); }
-PERCOMP1 (cos)
-PERCOMP1 (sin)
-PERCOMP1 (tan)
-PERCOMP1 (acos)
-PERCOMP1 (asin)
-PERCOMP1 (atan)
-PERCOMP2 (atan2)
-PERCOMP1 (cosh)
-PERCOMP1 (sinh)
-PERCOMP1 (tanh)
-PERCOMP2F (pow)
-PERCOMP1 (exp)
-PERCOMP1 (exp2)
-PERCOMP1 (expm1)
-PERCOMP1 (log)
-point  log (point a,  float b) { return log(a)/log(b); }
-vector log (vector a, float b) { return log(a)/log(b); }
-color  log (color a,  float b) { return log(a)/log(b); }
-float  log (float a,  float b) { return log(a)/log(b); }
-PERCOMP1 (log2)
-PERCOMP1 (log10)
-PERCOMP1 (logb)
-PERCOMP1 (sqrt)
-PERCOMP1 (inversesqrt)
-float hypot (float a, float b) { return sqrt (a*a + b*b); }
-float hypot (float a, float b, float c) { return sqrt (a*a + b*b + c*c); }
-PERCOMP1 (abs)
-int abs (int x) BUILTIN;
-PERCOMP1 (fabs)
-int fabs (int x) BUILTIN;
-PERCOMP1 (sign)
-PERCOMP1 (floor)
-PERCOMP1 (ceil)
-PERCOMP1 (round)
-PERCOMP1 (trunc)
-PERCOMP2 (fmod)
-PERCOMP2F (fmod)
-int    mod (int    a, int    b) { return a - b*(int)floor(a/b); }
-point  mod (point  a, point  b) { return a - b*floor(a/b); }
-vector mod (vector a, vector b) { return a - b*floor(a/b); }
-normal mod (normal a, normal b) { return a - b*floor(a/b); }
-color  mod (color  a, color  b) { return a - b*floor(a/b); }
-point  mod (point  a, float  b) { return a - b*floor(a/b); }
-vector mod (vector a, float  b) { return a - b*floor(a/b); }
-normal mod (normal a, float  b) { return a - b*floor(a/b); }
-color  mod (color  a, float  b) { return a - b*floor(a/b); }
-float  mod (float  a, float  b) { return a - b*floor(a/b); }
-PERCOMP2 (min)
-int min (int a, int b) BUILTIN;
-PERCOMP2 (max)
-int max (int a, int b) BUILTIN;
-normal clamp (normal x, normal minval, normal maxval) { return max(min(x,maxval),minval); }
-vector clamp (vector x, vector minval, vector maxval) { return max(min(x,maxval),minval); }
-point  clamp (point x, point minval, point maxval) { return max(min(x,maxval),minval); }
-color  clamp (color x, color minval, color maxval) { return max(min(x,maxval),minval); }
-float  clamp (float x, float minval, float maxval) { return max(min(x,maxval),minval); }
-int    clamp (int x, int minval, int maxval) { return max(min(x,maxval),minval); }
+normal degrees(normal x)
+{
+  return x * (180.0 / M_PI);
+}
+vector degrees(vector x)
+{
+  return x * (180.0 / M_PI);
+}
+point degrees(point x)
+{
+  return x * (180.0 / M_PI);
+}
+color degrees(color x)
+{
+  return x * (180.0 / M_PI);
+}
+float degrees(float x)
+{
+  return x * (180.0 / M_PI);
+}
+normal radians(normal x)
+{
+  return x * (M_PI / 180.0);
+}
+vector radians(vector x)
+{
+  return x * (M_PI / 180.0);
+}
+point radians(point x)
+{
+  return x * (M_PI / 180.0);
+}
+color radians(color x)
+{
+  return x * (M_PI / 180.0);
+}
+float radians(float x)
+{
+  return x * (M_PI / 180.0);
+}
+PERCOMP1(cos)
+PERCOMP1(sin)
+PERCOMP1(tan)
+PERCOMP1(acos)
+PERCOMP1(asin)
+PERCOMP1(atan)
+PERCOMP2(atan2)
+PERCOMP1(cosh)
+PERCOMP1(sinh)
+PERCOMP1(tanh)
+PERCOMP2F(pow)
+PERCOMP1(exp)
+PERCOMP1(exp2)
+PERCOMP1(expm1)
+PERCOMP1(log)
+point log(point a, float b)
+{
+  return log(a) / log(b);
+}
+vector log(vector a, float b)
+{
+  return log(a) / log(b);
+}
+color log(color a, float b)
+{
+  return log(a) / log(b);
+}
+float log(float a, float b)
+{
+  return log(a) / log(b);
+}
+PERCOMP1(log2)
+PERCOMP1(log10)
+PERCOMP1(logb)
+PERCOMP1(sqrt)
+PERCOMP1(inversesqrt)
+float hypot(float a, float b)
+{
+  return sqrt(a * a + b * b);
+}
+float hypot(float a, float b, float c)
+{
+  return sqrt(a * a + b * b + c * c);
+}
+PERCOMP1(abs)
+int abs(int x) BUILTIN;
+PERCOMP1(fabs)
+int fabs(int x) BUILTIN;
+PERCOMP1(sign)
+PERCOMP1(floor)
+PERCOMP1(ceil)
+PERCOMP1(round)
+PERCOMP1(trunc)
+PERCOMP2(fmod)
+PERCOMP2F(fmod)
+int mod(int a, int b)
+{
+  return a - b * (int)floor(a / b);
+}
+point mod(point a, point b)
+{
+  return a - b * floor(a / b);
+}
+vector mod(vector a, vector b)
+{
+  return a - b * floor(a / b);
+}
+normal mod(normal a, normal b)
+{
+  return a - b * floor(a / b);
+}
+color mod(color a, color b)
+{
+  return a - b * floor(a / b);
+}
+point mod(point a, float b)
+{
+  return a - b * floor(a / b);
+}
+vector mod(vector a, float b)
+{
+  return a - b * floor(a / b);
+}
+normal mod(normal a, float b)
+{
+  return a - b * floor(a / b);
+}
+color mod(color a, float b)
+{
+  return a - b * floor(a / b);
+}
+float mod(float a, float b)
+{
+  return a - b * floor(a / b);
+}
+PERCOMP2(min)
+int min(int a, int b) BUILTIN;
+PERCOMP2(max)
+int max(int a, int b) BUILTIN;
+normal clamp(normal x, normal minval, normal maxval)
+{
+  return max(min(x, maxval), minval);
+}
+vector clamp(vector x, vector minval, vector maxval)
+{
+  return max(min(x, maxval), minval);
+}
+point clamp(point x, point minval, point maxval)
+{
+  return max(min(x, maxval), minval);
+}
+color clamp(color x, color minval, color maxval)
+{
+  return max(min(x, maxval), minval);
+}
+float clamp(float x, float minval, float maxval)
+{
+  return max(min(x, maxval), minval);
+}
+int clamp(int x, int minval, int maxval)
+{
+  return max(min(x, maxval), minval);
+}
 #if 0
 normal mix (normal x, normal y, normal a) { return x*(1-a) + y*a; }
 normal mix (normal x, normal y, float  a) { return x*(1-a) + y*a; }
@@ -154,102 +245,121 @@ color  mix (color  x, color  y, color  a) { return x*(1-a) + y*a; }
 color  mix (color  x, color  y, float  a) { return x*(1-a) + y*a; }
 float  mix (float  x, float  y, float  a) { return x*(1-a) + y*a; }
 #else
-normal mix (normal x, normal y, normal a) BUILTIN;
-normal mix (normal x, normal y, float  a) BUILTIN;
-vector mix (vector x, vector y, vector a) BUILTIN;
-vector mix (vector x, vector y, float  a) BUILTIN;
-point  mix (point  x, point  y, point  a) BUILTIN;
-point  mix (point  x, point  y, float  a) BUILTIN;
-color  mix (color  x, color  y, color  a) BUILTIN;
-color  mix (color  x, color  y, float  a) BUILTIN;
-float  mix (float  x, float  y, float  a) BUILTIN;
+normal mix(normal x, normal y, normal a) BUILTIN;
+normal mix(normal x, normal y, float a) BUILTIN;
+vector mix(vector x, vector y, vector a) BUILTIN;
+vector mix(vector x, vector y, float a) BUILTIN;
+point mix(point x, point y, point a) BUILTIN;
+point mix(point x, point y, float a) BUILTIN;
+color mix(color x, color y, color a) BUILTIN;
+color mix(color x, color y, float a) BUILTIN;
+float mix(float x, float y, float a) BUILTIN;
 #endif
-int isnan (float x) BUILTIN;
-int isinf (float x) BUILTIN;
-int isfinite (float x) BUILTIN;
-float erf (float x) BUILTIN;
-float erfc (float x) BUILTIN;
+int isnan(float x) BUILTIN;
+int isinf(float x) BUILTIN;
+int isfinite(float x) BUILTIN;
+float erf(float x) BUILTIN;
+float erfc(float x) BUILTIN;
 
 // Vector functions
 
-vector cross (vector a, vector b) BUILTIN;
-float dot (vector a, vector b) BUILTIN;
-float length (vector v) BUILTIN;
-float distance (point a, point b) BUILTIN;
-float distance (point a, point b, point q)
-{
-    vector d = b - a;
-    float dd = dot(d, d);
-    if(dd == 0.0)
-        return distance(q, a);
-    float t = dot(q - a, d)/dd;
-    return distance(q, a + clamp(t, 0.0, 1.0)*d);
-}
-normal normalize (normal v) BUILTIN;
-vector normalize (vector v) BUILTIN;
-vector faceforward (vector N, vector I, vector Nref) BUILTIN;
-vector faceforward (vector N, vector I) BUILTIN;
-vector reflect (vector I, vector N) { return I - 2*dot(N,I)*N; }
-vector refract (vector I, vector N, float eta) {
-    float IdotN = dot (I, N);
-    float k = 1 - eta*eta * (1 - IdotN*IdotN);
-    return (k < 0) ? vector(0,0,0) : (eta*I - N * (eta*IdotN + sqrt(k)));
-}
-void fresnel (vector I, normal N, float eta,
-              output float Kr, output float Kt,
-              output vector R, output vector T)
-{
-    float sqr(float x) { return x*x; }
-    float c = dot(I, N);
-    if (c < 0)
-        c = -c;
-    R = reflect(I, N);
-    float g = 1.0 / sqr(eta) - 1.0 + c * c;
-    if (g >= 0.0) {
-        g = sqrt (g);
-        float beta = g - c;
-        float F = (c * (g+c) - 1.0) / (c * beta + 1.0);
-        F = 0.5 * (1.0 + sqr(F));
-        F *= sqr (beta / (g+c));
-        Kr = F;
-        Kt = (1.0 - Kr) * eta*eta;
-        // OPT: the following recomputes some of the above values, but it
-        // gives us the same result as if the shader-writer called refract()
-        T = refract(I, N, eta);
-    } else {
-        // total internal reflection
-        Kr = 1.0;
-        Kt = 0.0;
-        T = vector (0,0,0);
-    }
+vector cross(vector a, vector b) BUILTIN;
+float dot(vector a, vector b) BUILTIN;
+float length(vector v) BUILTIN;
+float distance(point a, point b) BUILTIN;
+float distance(point a, point b, point q)
+{
+  vector d = b - a;
+  float dd = dot(d, d);
+  if (dd == 0.0)
+    return distance(q, a);
+  float t = dot(q - a, d) / dd;
+  return distance(q, a + clamp(t, 0.0, 1.0) * d);
 }
-
-void fresnel (vector I, normal N, float eta,
-              output float Kr, output float Kt)
+normal normalize(normal v) BUILTIN;
+vector normalize(vector v) BUILTIN;
+vector faceforward(vector N, vector I, vector Nref) BUILTIN;
+vector faceforward(vector N, vector I) BUILTIN;
+vector reflect(vector I, vector N)
 {
-    vector R, T;
-    fresnel(I, N, eta, Kr, Kt, R, T);
+  return I - 2 * dot(N, I) * N;
+}
+vector refract(vector I, vector N, float eta)
+{
+  float IdotN = dot(I, N);
+  float k = 1 - eta * eta * (1 - IdotN * IdotN);
+  return (k < 0) ? vector(0, 0, 0) : (eta * I - N * (eta * IdotN + sqrt(k)));
+}
+void fresnel(vector I,
+             normal N,
+             float eta,
+             output float Kr,
+             output float Kt,
+             output vector R,
+             output vector T)
+{
+  float sqr(float x)
+  {
+    return x * x;
+  }
+  float c = dot(I, N);
+  if (c < 0)
+    c = -c;
+  R = reflect(I, N);
+  float g = 1.0 / sqr(eta) - 1.0 + c * c;
+  if (g >= 0.0) {
+    g = sqrt(g);
+    float beta = g - c;
+    float F = (c * (g + c) - 1.0) / (c * beta + 1.0);
+    F = 0.5 * (1.0 + sqr(F));
+    F *= sqr(beta / (g + c));
+    Kr = F;
+    Kt = (1.0 - Kr) * eta * eta;
+    // OPT: the following recomputes some of the above values, but it
+    // gives us the same result as if the shader-writer called refract()
+    T = refract(I, N, eta);
+  }
+  else {
+    // total internal reflection
+    Kr = 1.0;
+    Kt = 0.0;
+    T = vector(0, 0, 0);
+  }
 }
 
+void fresnel(vector I, normal N, float eta, output float Kr, output float Kt)
+{
+  vector R, T;
+  fresnel(I, N, eta, Kr, Kt, R, T);
+}
 
-normal transform (matrix Mto, normal p) BUILTIN;
-vector transform (matrix Mto, vector p) BUILTIN;
-point  transform (matrix Mto, point p) BUILTIN;
-normal transform (string from, string to, normal p) BUILTIN;
-vector transform (string from, string to, vector p) BUILTIN;
-point  transform (string from, string to, point p) BUILTIN;
-normal transform (string to, normal p) { return transform("common",to,p); }
-vector transform (string to, vector p) { return transform("common",to,p); }
-point  transform (string to, point p)  { return transform("common",to,p); }
+normal transform(matrix Mto, normal p) BUILTIN;
+vector transform(matrix Mto, vector p) BUILTIN;
+point transform(matrix Mto, point p) BUILTIN;
+normal transform(string from, string to, normal p) BUILTIN;
+vector transform(string from, string to, vector p) BUILTIN;
+point transform(string from, string to, point p) BUILTIN;
+normal transform(string to, normal p)
+{
+  return transform("common", to, p);
+}
+vector transform(string to, vector p)
+{
+  return transform("common", to, p);
+}
+point transform(string to, point p)
+{
+  return transform("common", to, p);
+}
 
-float transformu (string tounits, float x) BUILTIN;
-float transformu (string fromunits, string tounits, float x) BUILTIN;
+float transformu(string tounits, float x) BUILTIN;
+float transformu(string fromunits, string tounits, float x) BUILTIN;
 
-point rotate (point p, float angle, point a, point b)
+point rotate(point p, float angle, point a, point b)
 {
-    vector axis = normalize (b - a);
-    float cosang, sinang;
-    /* Older OSX has major issues with sincos() function,
+  vector axis = normalize(b - a);
+  float cosang, sinang;
+  /* Older OSX has major issues with sincos() function,
      * it's likely a big in OSL or LLVM. For until we've
      * updated to new versions of this libraries we'll
      * use a workaround to prevent possible crashes on all
@@ -261,317 +371,348 @@ point rotate (point p, float angle, point a, point b)
 #if 0
     sincos (angle, sinang, cosang);
 #else
-    sinang = sin (angle);
-    cosang = cos (angle);
+  sinang = sin(angle);
+  cosang = cos(angle);
 #endif
-    float cosang1 = 1.0 - cosang;
-    float x = axis[0], y = axis[1], z = axis[2];
-    matrix M = matrix (x * x + (1.0 - x * x) * cosang,
-                       x * y * cosang1 + z * sinang,
-                       x * z * cosang1 - y * sinang,
-                       0.0,
-                       x * y * cosang1 - z * sinang,
-                       y * y + (1.0 - y * y) * cosang,
-                       y * z * cosang1 + x * sinang,
-                       0.0,
-                       x * z * cosang1 + y * sinang,
-                       y * z * cosang1 - x * sinang,
-                       z * z + (1.0 - z * z) * cosang,
-                       0.0,
-                       0.0, 0.0, 0.0, 1.0);
-    return transform (M, p-a) + a;
+  float cosang1 = 1.0 - cosang;
+  float x = axis[0], y = axis[1], z = axis[2];
+  matrix M = matrix(x * x + (1.0 - x * x) * cosang,
+                    x * y * cosang1 + z * sinang,
+                    x * z * cosang1 - y * sinang,
+                    0.0,
+                    x * y * cosang1 - z * sinang,
+                    y * y + (1.0 - y * y) * cosang,
+                    y * z * cosang1 + x * sinang,
+                    0.0,
+                    x * z * cosang1 + y * sinang,
+                    y * z * cosang1 - x * sinang,
+                    z * z + (1.0 - z * z) * cosang,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    1.0);
+  return transform(M, p - a) + a;
 }
 
 normal ensure_valid_reflection(normal Ng, vector I, normal N)
 {
-    /* The implementation here mirrors the one in kernel_montecarlo.h,
+  /* The implementation here mirrors the one in kernel_montecarlo.h,
      * check there for an explanation of the algorithm. */
 
-    float sqr(float x) { return x*x; }
-
-    vector R = 2*dot(N, I)*N - I;
+  float sqr(float x)
+  {
+    return x * x;
+  }
 
-    float threshold = min(0.9*dot(Ng, I), 0.01);
-    if(dot(Ng, R) >= threshold) {
-        return N;
-    }
+  vector R = 2 * dot(N, I) * N - I;
 
-    float NdotNg = dot(N, Ng);
-    vector X = normalize(N - NdotNg*Ng);
+  float threshold = min(0.9 * dot(Ng, I), 0.01);
+  if (dot(Ng, R) >= threshold) {
+    return N;
+  }
 
-    float Ix = dot(I, X), Iz = dot(I, Ng);
-    float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
-    float a = Ix2 + Iz2;
+  float NdotNg = dot(N, Ng);
+  vector X = normalize(N - NdotNg * Ng);
 
-    float b = sqrt(Ix2*(a - sqr(threshold)));
-    float c = Iz*threshold + a;
+  float Ix = dot(I, X), Iz = dot(I, Ng);
+  float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+  float a = Ix2 + Iz2;
 
-    float fac = 0.5/a;
-    float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c);
-    int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
-    int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
+  float b = sqrt(Ix2 * (a - sqr(threshold)));
+  float c = Iz * threshold + a;
 
-    float N_new_x, N_new_z;
-    if(valid1 && valid2) {
-        float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
-        float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
+  float fac = 0.5 / a;
+  float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
+  int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
+  int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
 
-        float R1 = 2*(N1_x*Ix + N1_z*Iz)*N1_z - Iz;
-        float R2 = 2*(N2_x*Ix + N2_z*Iz)*N2_z - Iz;
+  float N_new_x, N_new_z;
+  if (valid1 && valid2) {
+    float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
+    float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
 
-        valid1 = (R1 >= 1e-5);
-        valid2 = (R2 >= 1e-5);
-        if(valid1 && valid2) {
-            N_new_x = (R1 < R2)? N1_x : N2_x;
-            N_new_z = (R1 < R2)? N1_z : N2_z;
-        }
-        else {
-            N_new_x = (R1 > R2)? N1_x : N2_x;
-            N_new_z = (R1 > R2)? N1_z : N2_z;
-        }
+    float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz;
+    float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz;
 
-    }
-    else if(valid1 || valid2) {
-        float Nz2 = valid1? N1_z2 : N2_z2;
-        N_new_x = sqrt(1.0 - Nz2);
-        N_new_z = sqrt(Nz2);
+    valid1 = (R1 >= 1e-5);
+    valid2 = (R2 >= 1e-5);
+    if (valid1 && valid2) {
+      N_new_x = (R1 < R2) ? N1_x : N2_x;
+      N_new_z = (R1 < R2) ? N1_z : N2_z;
     }
     else {
-        return Ng;
+      N_new_x = (R1 > R2) ? N1_x : N2_x;
+      N_new_z = (R1 > R2) ? N1_z : N2_z;
     }
-
-    return N_new_x*X + N_new_z*Ng;
+  }
+  else if (valid1 || valid2) {
+    float Nz2 = valid1 ? N1_z2 : N2_z2;
+    N_new_x = sqrt(1.0 - Nz2);
+    N_new_z = sqrt(Nz2);
+  }
+  else {
+    return Ng;
+  }
+
+  return N_new_x * X + N_new_z * Ng;
 }
 
-
 // Color functions
 
-float luminance (color c) BUILTIN;
-color blackbody (float temperatureK) BUILTIN;
-color wavelength_color (float wavelength_nm) BUILTIN;
-
-
-color transformc (string to, color x)
-{
-    color rgb_to_hsv (color rgb) {  // See Foley & van Dam
-        float r = rgb[0], g = rgb[1], b = rgb[2];
-        float mincomp = min (r, min (g, b));
-        float maxcomp = max (r, max (g, b));
-        float delta = maxcomp - mincomp;  // chroma
-        float h, s, v;
-        v = maxcomp;
-        if (maxcomp > 0)
-            s = delta / maxcomp;
-        else s = 0;
-        if (s <= 0)
-            h = 0;
-        else {
-            if      (r >= maxcomp) h = (g-b) / delta;
-            else if (g >= maxcomp) h = 2 + (b-r) / delta;
-            else                   h = 4 + (r-g) / delta;
-            h /= 6;
-            if (h < 0)
-                h += 1;
-        }
-        return color (h, s, v);
-    }
-
-    color rgb_to_hsl (color rgb) {  // See Foley & van Dam
-        // First convert rgb to hsv, then to hsl
-        float minval = min (rgb[0], min (rgb[1], rgb[2]));
-        color hsv = rgb_to_hsv (rgb);
-        float maxval = hsv[2];   // v == maxval
-        float h = hsv[0], s, l = (minval+maxval) / 2;
-        if (minval == maxval)
-            s = 0;  // special 'achromatic' case, hue is 0
-        else if (l <= 0.5)
-            s = (maxval - minval) / (maxval + minval);
-        else
-            s = (maxval - minval) / (2 - maxval - minval);
-        return color (h, s, l);
-    }
+float luminance(color c) BUILTIN;
+color blackbody(float temperatureK) BUILTIN;
+color wavelength_color(float wavelength_nm) BUILTIN;
 
-    color r;
-    if (to == "rgb" || to == "RGB")
-        r = x;
-    else if (to == "hsv")
-        r = rgb_to_hsv (x);
-    else if (to == "hsl")
-        r = rgb_to_hsl (x);
-    else if (to == "YIQ")
-        r = color (dot (vector(0.299,  0.587,  0.114), (vector)x),
-                   dot (vector(0.596, -0.275, -0.321), (vector)x),
-                   dot (vector(0.212, -0.523,  0.311), (vector)x));
-    else if (to == "XYZ")
-        r = color (dot (vector(0.412453, 0.357580, 0.180423), (vector)x),
-                   dot (vector(0.212671, 0.715160, 0.072169), (vector)x),
-                   dot (vector(0.019334, 0.119193, 0.950227), (vector)x));
+color transformc(string to, color x)
+{
+  color rgb_to_hsv(color rgb)
+  {  // See Foley & van Dam
+    float r = rgb[0], g = rgb[1], b = rgb[2];
+    float mincomp = min(r, min(g, b));
+    float maxcomp = max(r, max(g, b));
+    float delta = maxcomp - mincomp;  // chroma
+    float h, s, v;
+    v = maxcomp;
+    if (maxcomp > 0)
+      s = delta / maxcomp;
+    else
+      s = 0;
+    if (s <= 0)
+      h = 0;
     else {
-        error ("Unknown color space \"%s\"", to);
-        r = x;
+      if (r >= maxcomp)
+        h = (g - b) / delta;
+      else if (g >= maxcomp)
+        h = 2 + (b - r) / delta;
+      else
+        h = 4 + (r - g) / delta;
+      h /= 6;
+      if (h < 0)
+        h += 1;
     }
-    return r;
+    return color(h, s, v);
+  }
+
+  color rgb_to_hsl(color rgb)
+  {  // See Foley & van Dam
+    // First convert rgb to hsv, then to hsl
+    float minval = min(rgb[0], min(rgb[1], rgb[2]));
+    color hsv = rgb_to_hsv(rgb);
+    float maxval = hsv[2];  // v == maxval
+    float h = hsv[0], s, l = (minval + maxval) / 2;
+    if (minval == maxval)
+      s = 0;  // special 'achromatic' case, hue is 0
+    else if (l <= 0.5)
+      s = (maxval - minval) / (maxval + minval);
+    else
+      s = (maxval - minval) / (2 - maxval - minval);
+    return color(h, s, l);
+  }
+
+  color r;
+  if (to == "rgb" || to == "RGB")
+    r = x;
+  else if (to == "hsv")
+    r = rgb_to_hsv(x);
+  else if (to == "hsl")
+    r = rgb_to_hsl(x);
+  else if (to == "YIQ")
+    r = color(dot(vector(0.299, 0.587, 0.114), (vector)x),
+              dot(vector(0.596, -0.275, -0.321), (vector)x),
+              dot(vector(0.212, -0.523, 0.311), (vector)x));
+  else if (to == "XYZ")
+    r = color(dot(vector(0.412453, 0.357580, 0.180423), (vector)x),
+              dot(vector(0.212671, 0.715160, 0.072169), (vector)x),
+              dot(vector(0.019334, 0.119193, 0.950227), (vector)x));
+  else {
+    error("Unknown color space \"%s\"", to);
+    r = x;
+  }
+  return r;
 }
 
-
-color transformc (string from, string to, color x)
-{
-    color hsv_to_rgb (color c) { // Reference: Foley & van Dam
-        float h = c[0], s = c[1], v = c[2];
-        color r;
-        if (s < 0.0001) {
-            r = v;
-        } else {
-            h = 6 * (h - floor(h));  // expand to [0..6)
-            int hi = (int)h;
-            float f = h - hi;
-            float p = v * (1-s);
-            float q = v * (1-s*f);
-            float t = v * (1-s*(1-f));
-            if      (hi == 0) r = color (v, t, p);
-            else if (hi == 1) r = color (q, v, p);
-            else if (hi == 2) r = color (p, v, t);
-            else if (hi == 3) r = color (p, q, v);
-            else if (hi == 4) r = color (t, p, v);
-            else              r = color (v, p, q);
-        }
-        return r;
+color transformc(string from, string to, color x)
+{
+  color hsv_to_rgb(color c)
+  {  // Reference: Foley & van Dam
+    float h = c[0], s = c[1], v = c[2];
+    color r;
+    if (s < 0.0001) {
+      r = v;
     }
-
-    color hsl_to_rgb (color c) {
-        float h = c[0], s = c[1], l = c[2];
-        // Easiest to convert hsl -> hsv, then hsv -> RGB (per Foley & van Dam)
-        float v = (l <= 0.5) ? (l * (1 + s)) : (l * (1 - s) + s);
-        color r;
-        if (v <= 0) {
-            r = 0;
-        } else {
-            float min = 2 * l - v;
-            s = (v - min) / v;
-            r = hsv_to_rgb (color (h, s, v));
-        }
-        return r;
+    else {
+      h = 6 * (h - floor(h));  // expand to [0..6)
+      int hi = (int)h;
+      float f = h - hi;
+      float p = v * (1 - s);
+      float q = v * (1 - s * f);
+      float t = v * (1 - s * (1 - f));
+      if (hi == 0)
+        r = color(v, t, p);
+      else if (hi == 1)
+        r = color(q, v, p);
+      else if (hi == 2)
+        r = color(p, v, t);
+      else if (hi == 3)
+        r = color(p, q, v);
+      else if (hi == 4)
+        r = color(t, p, v);
+      else
+        r = color(v, p, q);
     }
+    return r;
+  }
 
+  color hsl_to_rgb(color c)
+  {
+    float h = c[0], s = c[1], l = c[2];
+    // Easiest to convert hsl -> hsv, then hsv -> RGB (per Foley & van Dam)
+    float v = (l <= 0.5) ? (l * (1 + s)) : (l * (1 - s) + s);
     color r;
-    if (from == "rgb" || from == "RGB")
-        r = x;
-    else if (from == "hsv")
-        r = hsv_to_rgb (x);
-    else if (from == "hsl")
-        r = hsl_to_rgb (x);
-    else if (from == "YIQ")
-        r = color (dot (vector(1,  0.9557,  0.6199), (vector)x),
-                   dot (vector(1, -0.2716, -0.6469), (vector)x),
-                   dot (vector(1, -1.1082,  1.7051), (vector)x));
-    else if (from == "XYZ")
-        r = color (dot (vector( 3.240479, -1.537150, -0.498535), (vector)x),
-                   dot (vector(-0.969256,  1.875991,  0.041556), (vector)x),
-                   dot (vector( 0.055648, -0.204043,  1.057311), (vector)x));
+    if (v <= 0) {
+      r = 0;
+    }
     else {
-        error ("Unknown color space \"%s\"", to);
-        r = x;
+      float min = 2 * l - v;
+      s = (v - min) / v;
+      r = hsv_to_rgb(color(h, s, v));
     }
-    return transformc (to, r);
+    return r;
+  }
+
+  color r;
+  if (from == "rgb" || from == "RGB")
+    r = x;
+  else if (from == "hsv")
+    r = hsv_to_rgb(x);
+  else if (from == "hsl")
+    r = hsl_to_rgb(x);
+  else if (from == "YIQ")
+    r = color(dot(vector(1, 0.9557, 0.6199), (vector)x),
+              dot(vector(1, -0.2716, -0.6469), (vector)x),
+              dot(vector(1, -1.1082, 1.7051), (vector)x));
+  else if (from == "XYZ")
+    r = color(dot(vector(3.240479, -1.537150, -0.498535), (vector)x),
+              dot(vector(-0.969256, 1.875991, 0.041556), (vector)x),
+              dot(vector(0.055648, -0.204043, 1.057311), (vector)x));
+  else {
+    error("Unknown color space \"%s\"", to);
+    r = x;
+  }
+  return transformc(to, r);
 }
 
-
-
 // Matrix functions
 
-float determinant (matrix m) BUILTIN;
-matrix transpose (matrix m) BUILTIN;
+float determinant(matrix m) BUILTIN;
+matrix transpose(matrix m) BUILTIN;
 
+// Pattern generation
 
+color step(color edge, color x) BUILTIN;
+point step(point edge, point x) BUILTIN;
+vector step(vector edge, vector x) BUILTIN;
+normal step(normal edge, normal x) BUILTIN;
+float step(float edge, float x) BUILTIN;
+float smoothstep(float edge0, float edge1, float x) BUILTIN;
 
-// Pattern generation
+float linearstep(float edge0, float edge1, float x)
+{
+  float result;
+  if (edge0 != edge1) {
+    float xclamped = clamp(x, edge0, edge1);
+    result = (xclamped - edge0) / (edge1 - edge0);
+  }
+  else {  // special case: edges coincide
+    result = step(edge0, x);
+  }
+  return result;
+}
 
-color step (color edge, color x) BUILTIN;
-point step (point edge, point x) BUILTIN;
-vector step (vector edge, vector x) BUILTIN;
-normal step (normal edge, normal x) BUILTIN;
-float step (float edge, float x) BUILTIN;
-float smoothstep (float edge0, float edge1, float x) BUILTIN;
-
-float linearstep (float edge0, float edge1, float x) {
-    float result;
-    if (edge0 != edge1) {
-        float xclamped = clamp (x, edge0, edge1);
-        result = (xclamped - edge0) / (edge1 - edge0);
-    } else {  // special case: edges coincide
-        result = step (edge0, x);
-    }
-    return result;
-}
-
-float smooth_linearstep (float edge0, float edge1, float x_, float eps_) {
-    float result;
-    if (edge0 != edge1) {
-        float rampup (float x, float r) { return 0.5/r * x*x; }
-        float width_inv = 1.0 / (edge1 - edge0);
-        float eps = eps_ * width_inv;
-        float x = (x_ - edge0) * width_inv;
-        if      (x <= -eps)                result = 0;
-        else if (x >= eps && x <= 1.0-eps) result = x;
-        else if (x >= 1.0+eps)             result = 1;
-        else if (x < eps)                  result = rampup (x+eps, 2.0*eps);
-        else  /* if (x < 1.0+eps) */        result = 1.0 - rampup (1.0+eps - x, 2.0*eps);
-    } else {
-        result = step (edge0, x_);
+float smooth_linearstep(float edge0, float edge1, float x_, float eps_)
+{
+  float result;
+  if (edge0 != edge1) {
+    float rampup(float x, float r)
+    {
+      return 0.5 / r * x * x;
     }
-    return result;
+    float width_inv = 1.0 / (edge1 - edge0);
+    float eps = eps_ * width_inv;
+    float x = (x_ - edge0) * width_inv;
+    if (x <= -eps)
+      result = 0;
+    else if (x >= eps && x <= 1.0 - eps)
+      result = x;
+    else if (x >= 1.0 + eps)
+      result = 1;
+    else if (x < eps)
+      result = rampup(x + eps, 2.0 * eps);
+    else /* if (x < 1.0+eps) */
+      result = 1.0 - rampup(1.0 + eps - x, 2.0 * eps);
+  }
+  else {
+    result = step(edge0, x_);
+  }
+  return result;
 }
 
-float aastep (float edge, float s, float dedge, float ds) {
-    // Box filtered AA step
-    float width = fabs(dedge) + fabs(ds);
-    float halfwidth = 0.5*width;
-    float e1 = edge-halfwidth;
-    return (s <= e1) ? 0.0 : ((s >= (edge+halfwidth)) ? 1.0 : (s-e1)/width);
+float aastep(float edge, float s, float dedge, float ds)
+{
+  // Box filtered AA step
+  float width = fabs(dedge) + fabs(ds);
+  float halfwidth = 0.5 * width;
+  float e1 = edge - halfwidth;
+  return (s <= e1) ? 0.0 : ((s >= (edge + halfwidth)) ? 1.0 : (s - e1) / width);
 }
-float aastep (float edge, float s, float ds) {
-    return aastep (edge, s, filterwidth(edge), ds);
+float aastep(float edge, float s, float ds)
+{
+  return aastep(edge, s, filterwidth(edge), ds);
 }
-float aastep (float edge, float s) {
-    return aastep (edge, s, filterwidth(edge), filterwidth(s));
+float aastep(float edge, float s)
+{
+  return aastep(edge, s, filterwidth(edge), filterwidth(s));
 }
 
-
 // Derivatives and area operators
 
-
 // Displacement functions
 
-
 // String functions
-int strlen (string s) BUILTIN;
-int hash (string s) BUILTIN;
-int getchar (string s, int index) BUILTIN;
-int startswith (string s, string prefix) BUILTIN;
-int endswith (string s, string suffix) BUILTIN;
-string substr (string s, int start, int len) BUILTIN;
-string substr (string s, int start) { return substr (s, start, strlen(s)); }
-float stof (string str) BUILTIN;
-int stoi (string str) BUILTIN;
+int strlen(string s) BUILTIN;
+int hash(string s) BUILTIN;
+int getchar(string s, int index) BUILTIN;
+int startswith(string s, string prefix) BUILTIN;
+int endswith(string s, string suffix) BUILTIN;
+string substr(string s, int start, int len) BUILTIN;
+string substr(string s, int start)
+{
+  return substr(s, start, strlen(s));
+}
+float stof(string str) BUILTIN;
+int stoi(string str) BUILTIN;
 
 // Define concat in terms of shorter concat
-string concat (string a, string b, string c) {
-    return concat(concat(a,b), c);
+string concat(string a, string b, string c)
+{
+  return concat(concat(a, b), c);
 }
-string concat (string a, string b, string c, string d) {
-    return concat(concat(a,b,c), d);
+string concat(string a, string b, string c, string d)
+{
+  return concat(concat(a, b, c), d);
 }
-string concat (string a, string b, string c, string d, string e) {
-    return concat(concat(a,b,c,d), e);
+string concat(string a, string b, string c, string d, string e)
+{
+  return concat(concat(a, b, c, d), e);
 }
-string concat (string a, string b, string c, string d, string e, string f) {
-    return concat(concat(a,b,c,d,e), f);
+string concat(string a, string b, string c, string d, string e, string f)
+{
+  return concat(concat(a, b, c, d, e), f);
 }
 
-
 // Texture
 
-
 // Closures
 
 closure color diffuse(normal N) BUILTIN;
@@ -591,14 +732,18 @@ closure color microfacet_multi_ggx(normal N, float ag, color C) BUILTIN;
 closure color microfacet_multi_ggx_aniso(normal N, vector T, float ax, float ay, color C) BUILTIN;
 closure color microfacet_multi_ggx_glass(normal N, float ag, float eta, color C) BUILTIN;
 closure color microfacet_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_ggx_aniso_fresnel(normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_multi_ggx_aniso_fresnel(normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
-closure color microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
+closure color microfacet_ggx_aniso_fresnel(
+    normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
+closure color
+microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
+closure color microfacet_multi_ggx_aniso_fresnel(
+    normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN;
+closure color
+microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN;
 closure color microfacet_beckmann(normal N, float ab) BUILTIN;
 closure color microfacet_beckmann_aniso(normal N, vector T, float ax, float ay) BUILTIN;
 closure color microfacet_beckmann_refraction(normal N, float ab, float eta) BUILTIN;
-closure color ashikhmin_shirley(normal N, vector T,float ax, float ay) BUILTIN;
+closure color ashikhmin_shirley(normal N, vector T, float ax, float ay) BUILTIN;
 closure color ashikhmin_velvet(normal N, float sigma) BUILTIN;
 closure color emission() BUILTIN;
 closure color background() BUILTIN;
@@ -612,78 +757,97 @@ closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_ro
 closure color bssrdf(string method, normal N, vector radius, color albedo) BUILTIN;
 
 // Hair
-closure color hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
-closure color hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
-closure color principled_hair(normal N, color sigma, float roughnessu, float roughnessv, float coat, float alpha, float eta) BUILTIN;
+closure color
+hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
+closure color
+hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN;
+closure color principled_hair(normal N,
+                              color sigma,
+                              float roughnessu,
+                              float roughnessv,
+                              float coat,
+                              float alpha,
+                              float eta) BUILTIN;
 
 // Volume
 closure color henyey_greenstein(float g) BUILTIN;
 closure color absorption() BUILTIN;
 
 // OSL 1.5 Microfacet functions
-closure color microfacet(string distribution, normal N, vector U, float xalpha, float yalpha, float eta, int refract) {
-	/* GGX */
-	if (distribution == "ggx" || distribution == "default") {
-		if (!refract) {
-			if (xalpha == yalpha) {
-				/* Isotropic */
-				return microfacet_ggx(N, xalpha);
-			}
-			else {
-				/* Anisotropic */
-				return microfacet_ggx_aniso(N, U, xalpha, yalpha);
-			}
-		}
-		else {
-			return microfacet_ggx_refraction(N, xalpha, eta);
-		}
-	}
-	/* Beckmann */
-	else {
-		if (!refract) {
-			if (xalpha == yalpha) {
-				/* Isotropic */
-				return microfacet_beckmann(N, xalpha);
-			}
-			else {
-				/* Anisotropic */
-				return microfacet_beckmann_aniso(N, U, xalpha, yalpha);
-			}
-		}
-		else {
-			return microfacet_beckmann_refraction(N, xalpha, eta);
-		}
-	}
-}
-
-closure color microfacet (string distribution, normal N, float alpha, float eta, int refract) {
-	return microfacet(distribution, N, vector(0), alpha, alpha, eta, refract);
+closure color microfacet(
+    string distribution, normal N, vector U, float xalpha, float yalpha, float eta, int refract)
+{
+  /* GGX */
+  if (distribution == "ggx" || distribution == "default") {
+    if (!refract) {
+      if (xalpha == yalpha) {
+        /* Isotropic */
+        return microfacet_ggx(N, xalpha);
+      }
+      else {
+        /* Anisotropic */
+        return microfacet_ggx_aniso(N, U, xalpha, yalpha);
+      }
+    }
+    else {
+      return microfacet_ggx_refraction(N, xalpha, eta);
+    }
+  }
+  /* Beckmann */
+  else {
+    if (!refract) {
+      if (xalpha == yalpha) {
+        /* Isotropic */
+        return microfacet_beckmann(N, xalpha);
+      }
+      else {
+        /* Anisotropic */
+        return microfacet_beckmann_aniso(N, U, xalpha, yalpha);
+      }
+    }
+    else {
+      return microfacet_beckmann_refraction(N, xalpha, eta);
+    }
+  }
 }
 
+closure color microfacet(string distribution, normal N, float alpha, float eta, int refract)
+{
+  return microfacet(distribution, N, vector(0), alpha, alpha, eta, refract);
+}
 
 // Renderer state
-int backfacing () BUILTIN;
-int raytype (string typename) BUILTIN;
+int backfacing() BUILTIN;
+int raytype(string typename) BUILTIN;
 // the individual 'isFOOray' functions are deprecated
-int iscameraray () { return raytype("camera"); }
-int isdiffuseray () { return raytype("diffuse"); }
-int isglossyray () { return raytype("glossy"); }
-int isshadowray () { return raytype("shadow"); }
-int getmatrix (string fromspace, string tospace, output matrix M) BUILTIN;
-int getmatrix (string fromspace, output matrix M) {
-    return getmatrix (fromspace, "common", M);
+int iscameraray()
+{
+  return raytype("camera");
+}
+int isdiffuseray()
+{
+  return raytype("diffuse");
+}
+int isglossyray()
+{
+  return raytype("glossy");
+}
+int isshadowray()
+{
+  return raytype("shadow");
+}
+int getmatrix(string fromspace, string tospace, output matrix M) BUILTIN;
+int getmatrix(string fromspace, output matrix M)
+{
+  return getmatrix(fromspace, "common", M);
 }
-
 
 // Miscellaneous
 
-
-
-
 #undef BUILTIN
 #undef BUILTIN_DERIV
 #undef PERCOMP1
 #undef PERCOMP2
 #undef PERCOMP2F
 
-#endif  /* CCL_STDOSL_H */
+#endif /* CCL_STDOSL_H */
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
index ed0a82067f1..e08d87ab618 100644
--- a/intern/cycles/kernel/split/kernel_branched.h
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -19,215 +19,213 @@ CCL_NAMESPACE_BEGIN
 #ifdef __BRANCHED_PATH__
 
 /* sets up the various state needed to do an indirect loop */
-ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg,
+                                                                     int ray_index)
 {
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
 
-	/* save a copy of the state to restore later */
-#define BRANCHED_STORE(name) \
-		branched_state->name = kernel_split_state.name[ray_index];
+  /* save a copy of the state to restore later */
+#  define BRANCHED_STORE(name) branched_state->name = kernel_split_state.name[ray_index];
 
-	BRANCHED_STORE(path_state);
-	BRANCHED_STORE(throughput);
-	BRANCHED_STORE(ray);
-	BRANCHED_STORE(isect);
-	BRANCHED_STORE(ray_state);
+  BRANCHED_STORE(path_state);
+  BRANCHED_STORE(throughput);
+  BRANCHED_STORE(ray);
+  BRANCHED_STORE(isect);
+  BRANCHED_STORE(ray_state);
 
-	*kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index);
-	for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
-		kernel_split_sd(branched_state_sd, ray_index)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
-	}
+  *kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index);
+  for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
+    kernel_split_sd(branched_state_sd, ray_index)->closure[i] =
+        kernel_split_sd(sd, ray_index)->closure[i];
+  }
 
-#undef BRANCHED_STORE
+#  undef BRANCHED_STORE
 
-	/* set loop counters to intial position */
-	branched_state->next_closure = 0;
-	branched_state->next_sample = 0;
+  /* set loop counters to intial position */
+  branched_state->next_closure = 0;
+  branched_state->next_sample = 0;
 }
 
 /* ends an indirect loop and restores the previous state */
-ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg,
+                                                                    int ray_index)
 {
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
 
-	/* restore state */
-#define BRANCHED_RESTORE(name) \
-		kernel_split_state.name[ray_index] = branched_state->name;
+  /* restore state */
+#  define BRANCHED_RESTORE(name) kernel_split_state.name[ray_index] = branched_state->name;
 
-	BRANCHED_RESTORE(path_state);
-	BRANCHED_RESTORE(throughput);
-	BRANCHED_RESTORE(ray);
-	BRANCHED_RESTORE(isect);
-	BRANCHED_RESTORE(ray_state);
+  BRANCHED_RESTORE(path_state);
+  BRANCHED_RESTORE(throughput);
+  BRANCHED_RESTORE(ray);
+  BRANCHED_RESTORE(isect);
+  BRANCHED_RESTORE(ray_state);
 
-	*kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index);
-	for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
-		kernel_split_sd(sd, ray_index)->closure[i] = kernel_split_sd(branched_state_sd, ray_index)->closure[i];
-	}
+  *kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index);
+  for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
+    kernel_split_sd(sd, ray_index)->closure[i] =
+        kernel_split_sd(branched_state_sd, ray_index)->closure[i];
+  }
 
-#undef BRANCHED_RESTORE
+#  undef BRANCHED_RESTORE
 
-	/* leave indirect loop */
-	REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
+  /* leave indirect loop */
+  REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
 }
 
-ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, int ray_index)
+ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg,
+                                                                   int ray_index)
 {
-	ccl_global char *ray_state = kernel_split_state.ray_state;
+  ccl_global char *ray_state = kernel_split_state.ray_state;
 
-	int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS,
-		kernel_split_state.queue_data, kernel_split_params.queue_size, kernel_split_params.queue_index);
+  int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS,
+                                       kernel_split_state.queue_data,
+                                       kernel_split_params.queue_size,
+                                       kernel_split_params.queue_index);
 
-	if(!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) {
-		return false;
-	}
+  if (!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) {
+    return false;
+  }
 
-#define SPLIT_DATA_ENTRY(type, name, num) \
-		if(num) { \
-			kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \
-		}
-	SPLIT_DATA_ENTRIES_BRANCHED_SHARED
-#undef SPLIT_DATA_ENTRY
+#  define SPLIT_DATA_ENTRY(type, name, num) \
+    if (num) { \
+      kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \
+    }
+  SPLIT_DATA_ENTRIES_BRANCHED_SHARED
+#  undef SPLIT_DATA_ENTRY
 
-	*kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index);
-	for(int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) {
-		kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
-	}
+  *kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index);
+  for (int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) {
+    kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
+  }
 
-	kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0;
-	kernel_split_state.branched_state[inactive_ray].original_ray = ray_index;
-	kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false;
+  kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0;
+  kernel_split_state.branched_state[inactive_ray].original_ray = ray_index;
+  kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false;
 
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
 
-	path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
-	path_radiance_copy_indirect(inactive_L, L);
+  path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
+  path_radiance_copy_indirect(inactive_L, L);
 
-	ray_state[inactive_ray] = RAY_REGENERATED;
-	ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED);
-	ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT));
+  ray_state[inactive_ray] = RAY_REGENERATED;
+  ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED);
+  ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT));
 
-	atomic_fetch_and_inc_uint32((ccl_global uint*)&kernel_split_state.branched_state[ray_index].shared_sample_count);
+  atomic_fetch_and_inc_uint32(
+      (ccl_global uint *)&kernel_split_state.branched_state[ray_index].shared_sample_count);
 
-	return true;
+  return true;
 }
 
 /* bounce off surface and integrate indirect light */
-ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg,
-                                                                                int ray_index,
-                                                                                float num_samples_adjust,
-                                                                                ShaderData *saved_sd,
-                                                                                bool reset_path_state,
-                                                                                bool wait_for_shared)
+ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(
+    KernelGlobals *kg,
+    int ray_index,
+    float num_samples_adjust,
+    ShaderData *saved_sd,
+    bool reset_path_state,
+    bool wait_for_shared)
 {
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
-
-	ShaderData *sd = saved_sd;
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	float3 throughput = branched_state->throughput;
-	ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
-
-	float sum_sample_weight = 0.0f;
-#ifdef __DENOISING_FEATURES__
-	if(ps->denoising_feature_weight > 0.0f) {
-		for(int i = 0; i < sd->num_closure; i++) {
-			const ShaderClosure *sc = &sd->closure[i];
-
-			/* transparency is not handled here, but in outer loop */
-			if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
-				continue;
-			}
-
-			sum_sample_weight += sc->sample_weight;
-		}
-	}
-	else {
-		sum_sample_weight = 1.0f;
-	}
-#endif  /* __DENOISING_FEATURES__ */
-
-	for(int i = branched_state->next_closure; i < sd->num_closure; i++) {
-		const ShaderClosure *sc = &sd->closure[i];
-
-		if(!CLOSURE_IS_BSDF(sc->type))
-			continue;
-		/* transparency is not handled here, but in outer loop */
-		if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
-			continue;
-
-		int num_samples;
-
-		if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
-			num_samples = kernel_data.integrator.diffuse_samples;
-		else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
-			num_samples = 1;
-		else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
-			num_samples = kernel_data.integrator.glossy_samples;
-		else
-			num_samples = kernel_data.integrator.transmission_samples;
-
-		num_samples = ceil_to_int(num_samples_adjust*num_samples);
-
-		float num_samples_inv = num_samples_adjust/num_samples;
-
-		for(int j = branched_state->next_sample; j < num_samples; j++) {
-			if(reset_path_state) {
-				*ps = branched_state->path_state;
-			}
-
-			ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
-
-			ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
-			*tp = throughput;
-
-			ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
-
-			if(!kernel_branched_path_surface_bounce(kg,
-			                                        sd,
-			                                        sc,
-			                                        j,
-			                                        num_samples,
-			                                        tp,
-			                                        ps,
-			                                        &L->state,
-			                                        bsdf_ray,
-			                                        sum_sample_weight))
-			{
-				continue;
-			}
-
-			ps->rng_hash = branched_state->path_state.rng_hash;
-
-			/* update state for next iteration */
-			branched_state->next_closure = i;
-			branched_state->next_sample = j+1;
-
-			/* start the indirect path */
-			*tp *= num_samples_inv;
-
-			if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
-				continue;
-			}
-
-			return true;
-		}
-
-		branched_state->next_sample = 0;
-	}
-
-	branched_state->next_closure = sd->num_closure;
-
-	if(wait_for_shared) {
-		branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
-		if(branched_state->waiting_on_shared_samples) {
-			return true;
-		}
-	}
-
-	return false;
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+  ShaderData *sd = saved_sd;
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  float3 throughput = branched_state->throughput;
+  ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+
+  float sum_sample_weight = 0.0f;
+#  ifdef __DENOISING_FEATURES__
+  if (ps->denoising_feature_weight > 0.0f) {
+    for (int i = 0; i < sd->num_closure; i++) {
+      const ShaderClosure *sc = &sd->closure[i];
+
+      /* transparency is not handled here, but in outer loop */
+      if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+        continue;
+      }
+
+      sum_sample_weight += sc->sample_weight;
+    }
+  }
+  else {
+    sum_sample_weight = 1.0f;
+  }
+#  endif /* __DENOISING_FEATURES__ */
+
+  for (int i = branched_state->next_closure; i < sd->num_closure; i++) {
+    const ShaderClosure *sc = &sd->closure[i];
+
+    if (!CLOSURE_IS_BSDF(sc->type))
+      continue;
+    /* transparency is not handled here, but in outer loop */
+    if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
+      continue;
+
+    int num_samples;
+
+    if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+      num_samples = kernel_data.integrator.diffuse_samples;
+    else if (CLOSURE_IS_BSDF_BSSRDF(sc->type))
+      num_samples = 1;
+    else if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+      num_samples = kernel_data.integrator.glossy_samples;
+    else
+      num_samples = kernel_data.integrator.transmission_samples;
+
+    num_samples = ceil_to_int(num_samples_adjust * num_samples);
+
+    float num_samples_inv = num_samples_adjust / num_samples;
+
+    for (int j = branched_state->next_sample; j < num_samples; j++) {
+      if (reset_path_state) {
+        *ps = branched_state->path_state;
+      }
+
+      ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
+
+      ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+      *tp = throughput;
+
+      ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
+
+      if (!kernel_branched_path_surface_bounce(
+              kg, sd, sc, j, num_samples, tp, ps, &L->state, bsdf_ray, sum_sample_weight)) {
+        continue;
+      }
+
+      ps->rng_hash = branched_state->path_state.rng_hash;
+
+      /* update state for next iteration */
+      branched_state->next_closure = i;
+      branched_state->next_sample = j + 1;
+
+      /* start the indirect path */
+      *tp *= num_samples_inv;
+
+      if (kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+        continue;
+      }
+
+      return true;
+    }
+
+    branched_state->next_sample = 0;
+  }
+
+  branched_state->next_closure = sd->num_closure;
+
+  if (wait_for_shared) {
+    branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+    if (branched_state->waiting_on_shared_samples) {
+      return true;
+    }
+  }
+
+  return false;
 }
 
-#endif  /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index 18eec6372f1..e77743350dc 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -41,132 +41,133 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_buffer_update(KernelGlobals *kg,
                                      ccl_local_param unsigned int *local_queue_atomics)
 {
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(ray_index == 0) {
-		/* We will empty this queue in this kernel. */
-		kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
-	}
-	char enqueue_flag = 0;
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (ray_index == 0) {
+    /* We will empty this queue in this kernel. */
+    kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+  }
+  char enqueue_flag = 0;
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
 
 #ifdef __COMPUTE_DEVICE_GPU__
-	/* If we are executing on a GPU device, we exit all threads that are not
-	 * required.
-	 *
-	 * If we are executing on a CPU device, then we need to keep all threads
-	 * active since we have barrier() calls later in the kernel. CPU devices,
-	 * expect all threads to execute barrier statement.
-	 */
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  /* If we are executing on a GPU device, we exit all threads that are not
+   * required.
+   *
+   * If we are executing on a CPU device, then we need to keep all threads
+   * active since we have barrier() calls later in the kernel. CPU devices,
+   * expect all threads to execute barrier statement.
+   */
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 #endif
 
 #ifndef __COMPUTE_DEVICE_GPU__
-	if(ray_index != QUEUE_EMPTY_SLOT) {
+  if (ray_index != QUEUE_EMPTY_SLOT) {
 #endif
 
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-	ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-	bool ray_was_updated = false;
-
-	if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
-		ray_was_updated = true;
-		uint sample = state->sample;
-		uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
-		ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
-
-		/* accumulate result in output buffer */
-		kernel_write_result(kg, buffer, sample, L);
-
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
-	}
-
-	if(kernel_data.film.cryptomatte_passes) {
-		/* Make sure no thread is writing to the buffers. */
-		ccl_barrier(CCL_LOCAL_MEM_FENCE);
-		if(ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) {
-			uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
-			ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
-			ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
-			kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
-		}
-	}
-
-	if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
-		/* We have completed current work; So get next work */
-		ccl_global uint *work_pools = kernel_split_params.work_pools;
-		uint total_work_size = kernel_split_params.total_work_size;
-		uint work_index;
-
-		if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
-			/* If work is invalid, this means no more work is available and the thread may exit */
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
-		}
-
-		if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
-			ccl_global WorkTile *tile = &kernel_split_params.tile;
-			uint x, y, sample;
-			get_work_pixel(tile, work_index, &x, &y, &sample);
-
-			/* Store buffer offset for writing to passes. */
-			uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
-			kernel_split_state.buffer_offset[ray_index] = buffer_offset;
-
-			/* Initialize random numbers and ray. */
-			uint rng_hash;
-			kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray);
-
-			if(ray->t != 0.0f) {
-				/* Initialize throughput, path radiance, Ray, PathState;
-				 * These rays proceed with path-iteration.
-				 */
-				*throughput = make_float3(1.0f, 1.0f, 1.0f);
-				path_radiance_init(L, kernel_data.film.use_light_pass);
-				path_state_init(kg,
-				                AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
-				                state,
-				                rng_hash,
-				                sample,
-				                ray);
+    ccl_global char *ray_state = kernel_split_state.ray_state;
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+    ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+    ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+    bool ray_was_updated = false;
+
+    if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+      ray_was_updated = true;
+      uint sample = state->sample;
+      uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+      ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+
+      /* accumulate result in output buffer */
+      kernel_write_result(kg, buffer, sample, L);
+
+      ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+    }
+
+    if (kernel_data.film.cryptomatte_passes) {
+      /* Make sure no thread is writing to the buffers. */
+      ccl_barrier(CCL_LOCAL_MEM_FENCE);
+      if (ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) {
+        uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+        ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+        ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+        kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
+      }
+    }
+
+    if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
+      /* We have completed current work; So get next work */
+      ccl_global uint *work_pools = kernel_split_params.work_pools;
+      uint total_work_size = kernel_split_params.total_work_size;
+      uint work_index;
+
+      if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
+        /* If work is invalid, this means no more work is available and the thread may exit */
+        ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+      }
+
+      if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
+        ccl_global WorkTile *tile = &kernel_split_params.tile;
+        uint x, y, sample;
+        get_work_pixel(tile, work_index, &x, &y, &sample);
+
+        /* Store buffer offset for writing to passes. */
+        uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+        kernel_split_state.buffer_offset[ray_index] = buffer_offset;
+
+        /* Initialize random numbers and ray. */
+        uint rng_hash;
+        kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray);
+
+        if (ray->t != 0.0f) {
+          /* Initialize throughput, path radiance, Ray, PathState;
+         * These rays proceed with path-iteration.
+         */
+          *throughput = make_float3(1.0f, 1.0f, 1.0f);
+          path_radiance_init(L, kernel_data.film.use_light_pass);
+          path_state_init(kg,
+                          AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+                          state,
+                          rng_hash,
+                          sample,
+                          ray);
 #ifdef __SUBSURFACE__
-				kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+          kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
 #endif
-				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-				enqueue_flag = 1;
-			}
-			else {
-				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
-			}
-		}
-	}
+          ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+          enqueue_flag = 1;
+        }
+        else {
+          ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+        }
+      }
+    }
 
 #ifndef __COMPUTE_DEVICE_GPU__
-	}
+  }
 #endif
 
-	/* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
-	 * These rays will be made active during next SceneIntersectkernel.
-	 */
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
+  /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
+   * These rays will be made active during next SceneIntersectkernel.
+   */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          local_queue_atomics,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 77fb61b80a8..52930843f56 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -28,82 +28,88 @@ ccl_device void kernel_data_init(
 #else
 void KERNEL_FUNCTION_FULL_NAME(data_init)(
 #endif
-        KernelGlobals *kg,
-        ccl_constant KernelData *data,
-        ccl_global void *split_data_buffer,
-        int num_elements,
-        ccl_global char *ray_state,
+    KernelGlobals *kg,
+    ccl_constant KernelData *data,
+    ccl_global void *split_data_buffer,
+    int num_elements,
+    ccl_global char *ray_state,
 
 #ifdef __KERNEL_OPENCL__
-		KERNEL_BUFFER_PARAMS,
+    KERNEL_BUFFER_PARAMS,
 #endif
 
-        int start_sample,
-        int end_sample,
-        int sx, int sy, int sw, int sh, int offset, int stride,
-        ccl_global int *Queue_index,                 /* Tracks the number of elements in queues */
-        int queuesize,                               /* size (capacity) of the queue */
-        ccl_global char *use_queues_flag,            /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
-        ccl_global unsigned int *work_pools,      /* Work pool for each work group */
-        unsigned int num_samples,
-        ccl_global float *buffer)
+    int start_sample,
+    int end_sample,
+    int sx,
+    int sy,
+    int sw,
+    int sh,
+    int offset,
+    int stride,
+    ccl_global int *Queue_index, /* Tracks the number of elements in queues */
+    int queuesize,               /* size (capacity) of the queue */
+    ccl_global char *
+        use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
+    ccl_global unsigned int *work_pools, /* Work pool for each work group */
+    unsigned int num_samples,
+    ccl_global float *buffer)
 {
 #ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, data_init);
+  STUB_ASSERT(KERNEL_ARCH, data_init);
 #else
 
-#ifdef __KERNEL_OPENCL__
-	kg->data = data;
-#endif
+#  ifdef __KERNEL_OPENCL__
+  kg->data = data;
+#  endif
 
-	kernel_split_params.tile.x = sx;
-	kernel_split_params.tile.y = sy;
-	kernel_split_params.tile.w = sw;
-	kernel_split_params.tile.h = sh;
+  kernel_split_params.tile.x = sx;
+  kernel_split_params.tile.y = sy;
+  kernel_split_params.tile.w = sw;
+  kernel_split_params.tile.h = sh;
 
-	kernel_split_params.tile.start_sample = start_sample;
-	kernel_split_params.tile.num_samples = num_samples;
+  kernel_split_params.tile.start_sample = start_sample;
+  kernel_split_params.tile.num_samples = num_samples;
 
-	kernel_split_params.tile.offset = offset;
-	kernel_split_params.tile.stride = stride;
+  kernel_split_params.tile.offset = offset;
+  kernel_split_params.tile.stride = stride;
 
-	kernel_split_params.tile.buffer = buffer;
+  kernel_split_params.tile.buffer = buffer;
 
-	kernel_split_params.total_work_size = sw * sh * num_samples;
+  kernel_split_params.total_work_size = sw * sh * num_samples;
 
-	kernel_split_params.work_pools = work_pools;
+  kernel_split_params.work_pools = work_pools;
 
-	kernel_split_params.queue_index = Queue_index;
-	kernel_split_params.queue_size = queuesize;
-	kernel_split_params.use_queues_flag = use_queues_flag;
+  kernel_split_params.queue_index = Queue_index;
+  kernel_split_params.queue_size = queuesize;
+  kernel_split_params.use_queues_flag = use_queues_flag;
 
-	split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
+  split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
 
-#ifdef __KERNEL_OPENCL__
-	kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
-	kernel_set_buffer_info(kg);
-#endif
+#  ifdef __KERNEL_OPENCL__
+  kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+  kernel_set_buffer_info(kg);
+#  endif
+
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+  /* Initialize queue data and queue index. */
+  if (thread_index < queuesize) {
+    for (int i = 0; i < NUM_QUEUES; i++) {
+      kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+    }
+  }
+
+  if (thread_index == 0) {
+    for (int i = 0; i < NUM_QUEUES; i++) {
+      Queue_index[i] = 0;
+    }
 
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-
-	/* Initialize queue data and queue index. */
-	if(thread_index < queuesize) {
-		for(int i = 0; i < NUM_QUEUES; i++) {
-			kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
-		}
-	}
-
-	if(thread_index == 0) {
-		for(int i = 0; i < NUM_QUEUES; i++) {
-			Queue_index[i] = 0;
-		}
-
-		/* The scene-intersect kernel should not use the queues very first time.
-		 * since the queue would be empty.
-		 */
-		*use_queues_flag = 0;
-	}
-#endif  /* KERENL_STUB */
+    /* The scene-intersect kernel should not use the queues very first time.
+     * since the queue would be empty.
+     */
+    *use_queues_flag = 0;
+  }
+#endif /* KERENL_STUB */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index ca79602c565..b2ca59d60cc 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -43,116 +43,111 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_direct_lighting(KernelGlobals *kg,
                                        ccl_local_param unsigned int *local_queue_atomics)
 {
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	char enqueue_flag = 0;
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
-
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
-
-		/* direct lighting */
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  char enqueue_flag = 0;
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
+
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+
+    /* direct lighting */
 #ifdef __EMISSION__
-		bool flag = (kernel_data.integrator.use_direct_light &&
-		             (sd->flag & SD_BSDF_HAS_EVAL));
+    bool flag = (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL));
 
 #  ifdef __BRANCHED_PATH__
-		if(flag && kernel_data.integrator.branched) {
-			flag = false;
-			enqueue_flag = 1;
-		}
-#  endif  /* __BRANCHED_PATH__ */
+    if (flag && kernel_data.integrator.branched) {
+      flag = false;
+      enqueue_flag = 1;
+    }
+#  endif /* __BRANCHED_PATH__ */
 
 #  ifdef __SHADOW_TRICKS__
-		if(flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
-			flag = false;
-			enqueue_flag = 1;
-		}
-#  endif  /* __SHADOW_TRICKS__ */
-
-		if(flag) {
-			/* Sample illumination from lights to find path contribution. */
-			float light_u, light_v;
-			path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-			float terminate = path_state_rng_light_termination(kg, state);
-
-			LightSample ls;
-			if(light_sample(kg,
-			                light_u, light_v,
-			                sd->time,
-			                sd->P,
-			                state->bounce,
-			                &ls)) {
-
-				Ray light_ray;
-				light_ray.time = sd->time;
-
-				BsdfEval L_light;
-				bool is_lamp;
-				if(direct_emission(kg,
-				                   sd,
-				                   AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
-				                   &ls,
-				                   state,
-				                   &light_ray,
-				                   &L_light,
-				                   &is_lamp,
-				                   terminate))
-				{
-					/* Write intermediate data to global memory to access from
-					 * the next kernel.
-					 */
-					kernel_split_state.light_ray[ray_index] = light_ray;
-					kernel_split_state.bsdf_eval[ray_index] = L_light;
-					kernel_split_state.is_lamp[ray_index] = is_lamp;
-					/* Mark ray state for next shadow kernel. */
-					enqueue_flag = 1;
-				}
-			}
-		}
-#endif  /* __EMISSION__ */
-	}
+    if (flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
+      flag = false;
+      enqueue_flag = 1;
+    }
+#  endif /* __SHADOW_TRICKS__ */
+
+    if (flag) {
+      /* Sample illumination from lights to find path contribution. */
+      float light_u, light_v;
+      path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+      float terminate = path_state_rng_light_termination(kg, state);
+
+      LightSample ls;
+      if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+
+        Ray light_ray;
+        light_ray.time = sd->time;
+
+        BsdfEval L_light;
+        bool is_lamp;
+        if (direct_emission(kg,
+                            sd,
+                            AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+                            &ls,
+                            state,
+                            &light_ray,
+                            &L_light,
+                            &is_lamp,
+                            terminate)) {
+          /* Write intermediate data to global memory to access from
+           * the next kernel.
+           */
+          kernel_split_state.light_ray[ray_index] = light_ray;
+          kernel_split_state.bsdf_eval[ray_index] = L_light;
+          kernel_split_state.is_lamp[ray_index] = is_lamp;
+          /* Mark ray state for next shadow kernel. */
+          enqueue_flag = 1;
+        }
+      }
+    }
+#endif /* __EMISSION__ */
+  }
 
 #ifdef __EMISSION__
-	/* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_SHADOW_RAY_CAST_DL_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
+  /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          local_queue_atomics,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 #endif
 
 #ifdef __BRANCHED_PATH__
-	/* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays
-	 * this is the last kernel before next_iteration_setup that uses local atomics so we do this here
-	 */
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_LIGHT_INDIRECT_ITER,
-	                        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER),
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
-
-#endif  /* __BRANCHED_PATH__ */
+  /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays
+   * this is the last kernel before next_iteration_setup that uses local atomics so we do this here
+   */
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  enqueue_ray_index_local(
+      ray_index,
+      QUEUE_LIGHT_INDIRECT_ITER,
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER),
+      kernel_split_params.queue_size,
+      local_queue_atomics,
+      kernel_split_state.queue_data,
+      kernel_split_params.queue_index);
+
+#endif /* __BRANCHED_PATH__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index fb5bd3d48dd..45b839db05f 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -18,203 +18,210 @@ CCL_NAMESPACE_BEGIN
 
 #if defined(__BRANCHED_PATH__) && defined(__VOLUME__)
 
-ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg,
+                                                                             int ray_index)
 {
-	kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+  kernel_split_branched_path_indirect_loop_init(kg, ray_index);
 
-	ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
+  ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
 }
 
-ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index)
+ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg,
+                                                                               int ray_index)
 {
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
 
-	ShaderData *sd = kernel_split_sd(sd, ray_index);
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+  ShaderData *sd = kernel_split_sd(sd, ray_index);
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
 
-	/* GPU: no decoupled ray marching, scatter probalistically */
-	int num_samples = kernel_data.integrator.volume_samples;
-	float num_samples_inv = 1.0f/num_samples;
+  /* GPU: no decoupled ray marching, scatter probalistically */
+  int num_samples = kernel_data.integrator.volume_samples;
+  float num_samples_inv = 1.0f / num_samples;
 
-	Ray volume_ray = branched_state->ray;
-	volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX;
+  Ray volume_ray = branched_state->ray;
+  volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ?
+                     branched_state->isect.t :
+                     FLT_MAX;
 
-	bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
+  bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
 
-	for(int j = branched_state->next_sample; j < num_samples; j++) {
-		ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
-		*ps = branched_state->path_state;
+  for (int j = branched_state->next_sample; j < num_samples; j++) {
+    ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+    *ps = branched_state->path_state;
 
-		ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
-		*pray = branched_state->ray;
+    ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
+    *pray = branched_state->ray;
 
-		ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
-		*tp = branched_state->throughput * num_samples_inv;
+    ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+    *tp = branched_state->throughput * num_samples_inv;
 
-		/* branch RNG state */
-		path_state_branch(ps, j, num_samples);
+    /* branch RNG state */
+    path_state_branch(ps, j, num_samples);
 
-		/* integrate along volume segment with distance sampling */
-		VolumeIntegrateResult result = kernel_volume_integrate(
-			kg, ps, sd, &volume_ray, L, tp, heterogeneous);
+    /* integrate along volume segment with distance sampling */
+    VolumeIntegrateResult result = kernel_volume_integrate(
+        kg, ps, sd, &volume_ray, L, tp, heterogeneous);
 
 #  ifdef __VOLUME_SCATTER__
-		if(result == VOLUME_PATH_SCATTERED) {
-			/* direct lighting */
-			kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
-
-			/* indirect light bounce */
-			if(!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) {
-				continue;
-			}
-
-			/* start the indirect path */
-			branched_state->next_closure = 0;
-			branched_state->next_sample = j+1;
-
-			/* Attempting to share too many samples is slow for volumes as it causes us to
-			 * loop here more and have many calls to kernel_volume_integrate which evaluates
-			 * shaders. The many expensive shader evaluations cause the work load to become
-			 * unbalanced and many threads to become idle in this kernel. Limiting the
-			 * number of shared samples here helps quite a lot.
-			 */
-			if(branched_state->shared_sample_count < 2) {
-				if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
-					continue;
-				}
-			}
-
-			return true;
-		}
+    if (result == VOLUME_PATH_SCATTERED) {
+      /* direct lighting */
+      kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
+
+      /* indirect light bounce */
+      if (!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) {
+        continue;
+      }
+
+      /* start the indirect path */
+      branched_state->next_closure = 0;
+      branched_state->next_sample = j + 1;
+
+      /* Attempting to share too many samples is slow for volumes as it causes us to
+       * loop here more and have many calls to kernel_volume_integrate which evaluates
+       * shaders. The many expensive shader evaluations cause the work load to become
+       * unbalanced and many threads to become idle in this kernel. Limiting the
+       * number of shared samples here helps quite a lot.
+       */
+      if (branched_state->shared_sample_count < 2) {
+        if (kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+          continue;
+        }
+      }
+
+      return true;
+    }
 #  endif
-	}
+  }
 
-	branched_state->next_sample = num_samples;
+  branched_state->next_sample = num_samples;
 
-	branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
-	if(branched_state->waiting_on_shared_samples) {
-		return true;
-	}
+  branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+  if (branched_state->waiting_on_shared_samples) {
+    return true;
+  }
 
-	kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+  kernel_split_branched_path_indirect_loop_end(kg, ray_index);
 
-	/* todo: avoid this calculation using decoupled ray marching */
-	float3 throughput = kernel_split_state.throughput[ray_index];
-	kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
-	kernel_split_state.throughput[ray_index] = throughput;
+  /* todo: avoid this calculation using decoupled ray marching */
+  float3 throughput = kernel_split_state.throughput[ray_index];
+  kernel_volume_shadow(
+      kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
+  kernel_split_state.throughput[ray_index] = throughput;
 
-	return false;
+  return false;
 }
 
-#endif  /* __BRANCHED_PATH__ && __VOLUME__ */
+#endif /* __BRANCHED_PATH__ && __VOLUME__ */
 
 ccl_device void kernel_do_volume(KernelGlobals *kg)
 {
 #ifdef __VOLUME__
-	/* We will empty this queue in this kernel. */
-	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
-		kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+  /* We will empty this queue in this kernel. */
+  if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+    kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
 #  ifdef __BRANCHED_PATH__
-		kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
-#  endif  /* __BRANCHED_PATH__ */
-	}
-
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-
-	if(*kernel_split_params.use_queues_flag) {
-		ray_index = get_ray_index(kg, ray_index,
-		                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-		                          kernel_split_state.queue_data,
-		                          kernel_split_params.queue_size,
-		                          1);
-	}
-
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
-	   IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
-		ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
-		ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
-		bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
-
-		/* Sanitize volume stack. */
-		if(!hit) {
-			kernel_volume_clean_stack(kg, state->volume_stack);
-		}
-		/* volume attenuation, emission, scatter */
-		if(state->volume_stack[0].shader != SHADER_NONE) {
-			Ray volume_ray = *ray;
-			volume_ray.t = (hit)? isect->t: FLT_MAX;
+    kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
+#  endif /* __BRANCHED_PATH__ */
+  }
+
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+  if (*kernel_split_params.use_queues_flag) {
+    ray_index = get_ray_index(kg,
+                              ray_index,
+                              QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
+                              1);
+  }
+
+  ccl_global char *ray_state = kernel_split_state.ray_state;
+
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+
+  if (IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
+      IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+    ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+    ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+    ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+    ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+    bool hit = !IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
+
+    /* Sanitize volume stack. */
+    if (!hit) {
+      kernel_volume_clean_stack(kg, state->volume_stack);
+    }
+    /* volume attenuation, emission, scatter */
+    if (state->volume_stack[0].shader != SHADER_NONE) {
+      Ray volume_ray = *ray;
+      volume_ray.t = (hit) ? isect->t : FLT_MAX;
 
 #  ifdef __BRANCHED_PATH__
-			if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
-#  endif  /* __BRANCHED_PATH__ */
-				bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+      if (!kernel_data.integrator.branched ||
+          IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#  endif /* __BRANCHED_PATH__ */
+        bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
 
-				{
-					/* integrate along volume segment with distance sampling */
-					VolumeIntegrateResult result = kernel_volume_integrate(
-						kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+        {
+          /* integrate along volume segment with distance sampling */
+          VolumeIntegrateResult result = kernel_volume_integrate(
+              kg, state, sd, &volume_ray, L, throughput, heterogeneous);
 
 #  ifdef __VOLUME_SCATTER__
-					if(result == VOLUME_PATH_SCATTERED) {
-						/* direct lighting */
-						kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
-
-						/* indirect light bounce */
-						if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) {
-							ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-						}
-						else {
-							kernel_split_path_end(kg, ray_index);
-						}
-					}
-#  endif  /* __VOLUME_SCATTER__ */
-				}
+          if (result == VOLUME_PATH_SCATTERED) {
+            /* direct lighting */
+            kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+            /* indirect light bounce */
+            if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) {
+              ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+            }
+            else {
+              kernel_split_path_end(kg, ray_index);
+            }
+          }
+#  endif /* __VOLUME_SCATTER__ */
+        }
 
 #  ifdef __BRANCHED_PATH__
-			}
-			else {
-				kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
-
-				if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
-					ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-				}
-			}
-#  endif  /* __BRANCHED_PATH__ */
-		}
-	}
+      }
+      else {
+        kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
+
+        if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+          ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+        }
+      }
+#  endif /* __BRANCHED_PATH__ */
+    }
+  }
 
 #  ifdef __BRANCHED_PATH__
-	/* iter loop */
-	ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
-	                          QUEUE_VOLUME_INDIRECT_ITER,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
-
-	if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
-		/* for render passes, sum and reset indirect light pass variables
-		 * for the next samples */
-		path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
-		path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
-
-		if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-		}
-	}
-#  endif  /* __BRANCHED_PATH__ */
-
-#endif  /* __VOLUME__ */
+  /* iter loop */
+  ray_index = get_ray_index(kg,
+                            ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+                            QUEUE_VOLUME_INDIRECT_ITER,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
+
+  if (IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
+    /* for render passes, sum and reset indirect light pass variables
+     * for the next samples */
+    path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+    path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+    if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+      ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+    }
+  }
+#  endif /* __BRANCHED_PATH__ */
+
+#endif /* __VOLUME__ */
 }
 
-
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_enqueue_inactive.h b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
index 496355bbc3a..31d2daef616 100644
--- a/intern/cycles/kernel/split/kernel_enqueue_inactive.h
+++ b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
@@ -20,27 +20,27 @@ ccl_device void kernel_enqueue_inactive(KernelGlobals *kg,
                                         ccl_local_param unsigned int *local_queue_atomics)
 {
 #ifdef __BRANCHED_PATH__
-	/* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 
-	char enqueue_flag = 0;
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) {
-		enqueue_flag = 1;
-	}
+  char enqueue_flag = 0;
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) {
+    enqueue_flag = 1;
+  }
 
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_INACTIVE_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
-#endif  /* __BRANCHED_PATH__ */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_INACTIVE_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          local_queue_atomics,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
+#endif /* __BRANCHED_PATH__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index f14eecec2f2..63bc5a8e0ce 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -54,120 +54,112 @@ CCL_NAMESPACE_BEGIN
  */
 
 ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
-        KernelGlobals *kg,
-        ccl_local_param BackgroundAOLocals *locals)
+    KernelGlobals *kg, ccl_local_param BackgroundAOLocals *locals)
 {
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		locals->queue_atomics_bg = 0;
-		locals->queue_atomics_ao = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    locals->queue_atomics_bg = 0;
+    locals->queue_atomics_ao = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 #ifdef __AO__
-	char enqueue_flag = 0;
+  char enqueue_flag = 0;
 #endif
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
 
 #ifdef __COMPUTE_DEVICE_GPU__
-	/* If we are executing on a GPU device, we exit all threads that are not
-	 * required.
-	 *
-	 * If we are executing on a CPU device, then we need to keep all threads
-	 * active since we have barrier() calls later in the kernel. CPU devices,
-	 * expect all threads to execute barrier statement.
-	 */
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
-#endif  /* __COMPUTE_DEVICE_GPU__ */
+  /* If we are executing on a GPU device, we exit all threads that are not
+   * required.
+   *
+   * If we are executing on a CPU device, then we need to keep all threads
+   * active since we have barrier() calls later in the kernel. CPU devices,
+   * expect all threads to execute barrier statement.
+   */
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
+#endif /* __COMPUTE_DEVICE_GPU__ */
 
 #ifndef __COMPUTE_DEVICE_GPU__
-	if(ray_index != QUEUE_EMPTY_SLOT) {
+  if (ray_index != QUEUE_EMPTY_SLOT) {
 #endif
 
-	ccl_global PathState *state = 0x0;
-	float3 throughput;
-
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-	ShaderData *sd = kernel_split_sd(sd, ray_index);
-
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
-		ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
-
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-
-		throughput = kernel_split_state.throughput[ray_index];
-		state = &kernel_split_state.path_state[ray_index];
-
-		if(!kernel_path_shader_apply(kg,
-		                             sd,
-		                             state,
-		                             ray,
-		                             throughput,
-		                             emission_sd,
-		                             L,
-		                             buffer))
-		{
-			kernel_split_path_end(kg, ray_index);
-		}
-	}
-
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		/* Path termination. this is a strange place to put the termination, it's
-		 * mainly due to the mixed in MIS that we use. gives too many unneeded
-		 * shader evaluations, only need emission if we are going to terminate.
-		 */
-		float probability = path_state_continuation_probability(kg, state, throughput);
-
-		if(probability == 0.0f) {
-			kernel_split_path_end(kg, ray_index);
-		}
-		else if(probability < 1.0f) {
-			float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
-			if(terminate >= probability) {
-				kernel_split_path_end(kg, ray_index);
-			}
-			else {
-				kernel_split_state.throughput[ray_index] = throughput/probability;
-			}
-		}
-
-		if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-			PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-			kernel_update_denoising_features(kg, sd, state, L);
-		}
-	}
+    ccl_global PathState *state = 0x0;
+    float3 throughput;
+
+    ccl_global char *ray_state = kernel_split_state.ray_state;
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+      ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+
+      ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+      ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+      PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+      throughput = kernel_split_state.throughput[ray_index];
+      state = &kernel_split_state.path_state[ray_index];
+
+      if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, buffer)) {
+        kernel_split_path_end(kg, ray_index);
+      }
+    }
+
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      /* Path termination. this is a strange place to put the termination, it's
+     * mainly due to the mixed in MIS that we use. gives too many unneeded
+     * shader evaluations, only need emission if we are going to terminate.
+     */
+      float probability = path_state_continuation_probability(kg, state, throughput);
+
+      if (probability == 0.0f) {
+        kernel_split_path_end(kg, ray_index);
+      }
+      else if (probability < 1.0f) {
+        float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+        if (terminate >= probability) {
+          kernel_split_path_end(kg, ray_index);
+        }
+        else {
+          kernel_split_state.throughput[ray_index] = throughput / probability;
+        }
+      }
+
+      if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+        PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+        kernel_update_denoising_features(kg, sd, state, L);
+      }
+    }
 
 #ifdef __AO__
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		/* ambient occlusion */
-		if(kernel_data.integrator.use_ambient_occlusion) {
-			enqueue_flag = 1;
-		}
-	}
-#endif  /* __AO__ */
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      /* ambient occlusion */
+      if (kernel_data.integrator.use_ambient_occlusion) {
+        enqueue_flag = 1;
+      }
+    }
+#endif /* __AO__ */
 
 #ifndef __COMPUTE_DEVICE_GPU__
-	}
+  }
 #endif
 
 #ifdef __AO__
-	/* Enqueue to-shadow-ray-cast rays. */
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_SHADOW_RAY_CAST_AO_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        &locals->queue_atomics_ao,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
+  /* Enqueue to-shadow-ray-cast rays. */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          &locals->queue_atomics_ao,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 #endif
 }
 
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index 4cf88a02590..b1c65f61e2c 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -18,48 +18,50 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device void kernel_indirect_background(KernelGlobals *kg)
 {
-	ccl_global char *ray_state = kernel_split_state.ray_state;
+  ccl_global char *ray_state = kernel_split_state.ray_state;
 
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	int ray_index;
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  int ray_index;
 
-	if(kernel_data.integrator.ao_bounces != INT_MAX) {
-		ray_index = get_ray_index(kg, thread_index,
-		                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-		                          kernel_split_state.queue_data,
-		                          kernel_split_params.queue_size,
-		                          0);
+  if (kernel_data.integrator.ao_bounces != INT_MAX) {
+    ray_index = get_ray_index(kg,
+                              thread_index,
+                              QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
+                              0);
 
-		if(ray_index != QUEUE_EMPTY_SLOT) {
-			if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-				ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-				if(path_state_ao_bounce(kg, state)) {
-					kernel_split_path_end(kg, ray_index);
-				}
-			}
-		}
-	}
+    if (ray_index != QUEUE_EMPTY_SLOT) {
+      if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+        ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+        if (path_state_ao_bounce(kg, state)) {
+          kernel_split_path_end(kg, ray_index);
+        }
+      }
+    }
+  }
 
-	ray_index = get_ray_index(kg, thread_index,
-	                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
+  ray_index = get_ray_index(kg,
+                            thread_index,
+                            QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
 
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 
-	if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		float3 throughput = kernel_split_state.throughput[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
+  if (IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+    ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+    float3 throughput = kernel_split_state.throughput[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
 
-		kernel_path_background(kg, state, ray, throughput, sd, L);
-		kernel_split_path_end(kg, ray_index);
-	}
+    kernel_path_background(kg, state, ray, throughput, sd, L);
+    kernel_split_path_end(kg, ray_index);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
index 236c94e983c..3f48f8d6f56 100644
--- a/intern/cycles/kernel/split/kernel_indirect_subsurface.h
+++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
@@ -18,53 +18,50 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
 {
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(thread_index == 0) {
-		/* We will empty both queues in this kernel. */
-		kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
-		kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
-	}
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (thread_index == 0) {
+    /* We will empty both queues in this kernel. */
+    kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+    kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+  }
 
-	int ray_index;
-	get_ray_index(kg, thread_index,
-	              QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	              kernel_split_state.queue_data,
-	              kernel_split_params.queue_size,
-	              1);
-	ray_index = get_ray_index(kg, thread_index,
-	                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
+  int ray_index;
+  get_ray_index(kg,
+                thread_index,
+                QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                kernel_split_state.queue_data,
+                kernel_split_params.queue_size,
+                1);
+  ray_index = get_ray_index(kg,
+                            thread_index,
+                            QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
 
 #ifdef __SUBSURFACE__
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-	ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+  ccl_global char *ray_state = kernel_split_state.ray_state;
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+  ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
 
-	if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
-		ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+  if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+    ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
 
-		/* Trace indirect subsurface rays by restarting the loop. this uses less
-		 * stack memory than invoking kernel_path_indirect.
-		 */
-		if(ss_indirect->num_rays) {
-			kernel_path_subsurface_setup_indirect(kg,
-			                                      ss_indirect,
-			                                      state,
-			                                      ray,
-			                                      L,
-			                                      throughput);
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-		}
-	}
-#endif  /* __SUBSURFACE__ */
+    /* Trace indirect subsurface rays by restarting the loop. this uses less
+     * stack memory than invoking kernel_path_indirect.
+     */
+    if (ss_indirect->num_rays) {
+      kernel_path_subsurface_setup_indirect(kg, ss_indirect, state, ray, L, throughput);
+      ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+    }
+  }
+#endif /* __SUBSURFACE__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index 5b2c554b922..7ecb099208d 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -23,45 +23,45 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_lamp_emission(KernelGlobals *kg)
 {
 #ifndef __VOLUME__
-	/* We will empty this queue in this kernel. */
-	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
-		kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
-	}
+  /* We will empty this queue in this kernel. */
+  if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+    kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+  }
 #endif
-	/* Fetch use_queues_flag. */
-	char local_use_queues_flag = *kernel_split_params.use_queues_flag;
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  /* Fetch use_queues_flag. */
+  char local_use_queues_flag = *kernel_split_params.use_queues_flag;
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(local_use_queues_flag) {
-		ray_index = get_ray_index(kg, ray_index,
-		                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-		                          kernel_split_state.queue_data,
-		                          kernel_split_params.queue_size,
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (local_use_queues_flag) {
+    ray_index = get_ray_index(kg,
+                              ray_index,
+                              QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
 #ifndef __VOLUME__
-		                          1
+                              1
 #else
-		                          0
+                              0
 #endif
-		                          );
-		if(ray_index == QUEUE_EMPTY_SLOT) {
-			return;
-		}
-	}
+    );
+    if (ray_index == QUEUE_EMPTY_SLOT) {
+      return;
+    }
+  }
 
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
-	   IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND))
-	{
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
 
-		float3 throughput = kernel_split_state.throughput[ray_index];
-		Ray ray = kernel_split_state.ray[ray_index];
-		ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
+    float3 throughput = kernel_split_state.throughput[ray_index];
+    Ray ray = kernel_split_state.ray[ray_index];
+    ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
 
-		kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
-	}
+    kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index e388955f1af..781ce869374 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -48,217 +48,211 @@ CCL_NAMESPACE_BEGIN
 #ifdef __BRANCHED_PATH__
 ccl_device_inline void kernel_split_branched_indirect_light_init(KernelGlobals *kg, int ray_index)
 {
-	kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+  kernel_split_branched_path_indirect_loop_init(kg, ray_index);
 
-	ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT);
+  ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT);
 }
 
 ccl_device void kernel_split_branched_transparent_bounce(KernelGlobals *kg, int ray_index)
 {
-	ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-	ShaderData *sd = kernel_split_sd(sd, ray_index);
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+  ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+  ShaderData *sd = kernel_split_sd(sd, ray_index);
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
 
 #  ifdef __VOLUME__
-	if(!(sd->flag & SD_HAS_ONLY_VOLUME)) {
+  if (!(sd->flag & SD_HAS_ONLY_VOLUME)) {
 #  endif
-		/* continue in case of transparency */
-		*throughput *= shader_bsdf_transparency(kg, sd);
+    /* continue in case of transparency */
+    *throughput *= shader_bsdf_transparency(kg, sd);
 
-		if(is_zero(*throughput)) {
-			kernel_split_path_end(kg, ray_index);
-			return;
-		}
+    if (is_zero(*throughput)) {
+      kernel_split_path_end(kg, ray_index);
+      return;
+    }
 
-		/* Update Path State */
-		path_state_next(kg, state, LABEL_TRANSPARENT);
+    /* Update Path State */
+    path_state_next(kg, state, LABEL_TRANSPARENT);
 #  ifdef __VOLUME__
-	}
-	else {
-		if(!path_state_volume_next(kg, state)) {
-			kernel_split_path_end(kg, ray_index);
-			return;
-		}
-	}
+  }
+  else {
+    if (!path_state_volume_next(kg, state)) {
+      kernel_split_path_end(kg, ray_index);
+      return;
+    }
+  }
 #  endif
 
-	ray->P = ray_offset(sd->P, -sd->Ng);
-	ray->t -= sd->ray_length; /* clipping works through transparent */
+  ray->P = ray_offset(sd->P, -sd->Ng);
+  ray->t -= sd->ray_length; /* clipping works through transparent */
 
 #  ifdef __RAY_DIFFERENTIALS__
-	ray->dP = sd->dP;
-	ray->dD.dx = -sd->dI.dx;
-	ray->dD.dy = -sd->dI.dy;
-#  endif  /* __RAY_DIFFERENTIALS__ */
+  ray->dP = sd->dP;
+  ray->dD.dx = -sd->dI.dx;
+  ray->dD.dy = -sd->dI.dy;
+#  endif /* __RAY_DIFFERENTIALS__ */
 
 #  ifdef __VOLUME__
-	/* enter/exit volume */
-	kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
-#  endif  /* __VOLUME__ */
+  /* enter/exit volume */
+  kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+#  endif /* __VOLUME__ */
 }
-#endif  /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
 
 ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
                                             ccl_local_param unsigned int *local_queue_atomics)
 {
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
-		/* If we are here, then it means that scene-intersect kernel
-		* has already been executed atleast once. From the next time,
-		* scene-intersect kernel may operate on queues to fetch ray index
-		*/
-		*kernel_split_params.use_queues_flag = 1;
-
-		/* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and
-		 * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the
-		 * previous kernel.
-		 */
-		kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
-		kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
-	}
-
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
-
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-
-#  ifdef __VOLUME__
-	/* Reactivate only volume rays here, most surface work was skipped. */
-	if(IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE);
-	}
-#  endif
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+    /* If we are here, then it means that scene-intersect kernel
+    * has already been executed atleast once. From the next time,
+    * scene-intersect kernel may operate on queues to fetch ray index
+    */
+    *kernel_split_params.use_queues_flag = 1;
+
+    /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and
+     * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the
+     * previous kernel.
+     */
+    kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
+    kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
+  }
+
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
+
+  ccl_global char *ray_state = kernel_split_state.ray_state;
+
+#ifdef __VOLUME__
+  /* Reactivate only volume rays here, most surface work was skipped. */
+  if (IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE);
+  }
+#endif
 
-	bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE);
-	if(active) {
-		ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE);
+  if (active) {
+    ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+    ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
 
 #ifdef __BRANCHED_PATH__
-		if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+    if (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
 #endif
-			/* Compute direct lighting and next bounce. */
-			if(!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) {
-				kernel_split_path_end(kg, ray_index);
-			}
+      /* Compute direct lighting and next bounce. */
+      if (!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) {
+        kernel_split_path_end(kg, ray_index);
+      }
 #ifdef __BRANCHED_PATH__
-		}
-		else if(sd->flag & SD_HAS_ONLY_VOLUME) {
-			kernel_split_branched_transparent_bounce(kg, ray_index);
-		}
-		else {
-			kernel_split_branched_indirect_light_init(kg, ray_index);
-
-			if(kernel_split_branched_path_surface_indirect_light_iter(kg,
-			                                                          ray_index,
-			                                                          1.0f,
-			                                                          kernel_split_sd(branched_state_sd, ray_index),
-			                                                          true,
-			                                                          true))
-			{
-				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-			}
-			else {
-				kernel_split_branched_path_indirect_loop_end(kg, ray_index);
-				kernel_split_branched_transparent_bounce(kg, ray_index);
-			}
-		}
-#endif  /* __BRANCHED_PATH__ */
-	}
-
-	/* Enqueue RAY_UPDATE_BUFFER rays. */
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                        IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active,
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
+    }
+    else if (sd->flag & SD_HAS_ONLY_VOLUME) {
+      kernel_split_branched_transparent_bounce(kg, ray_index);
+    }
+    else {
+      kernel_split_branched_indirect_light_init(kg, ray_index);
+
+      if (kernel_split_branched_path_surface_indirect_light_iter(
+              kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) {
+        ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+      }
+      else {
+        kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+        kernel_split_branched_transparent_bounce(kg, ray_index);
+      }
+    }
+#endif /* __BRANCHED_PATH__ */
+  }
+
+  /* Enqueue RAY_UPDATE_BUFFER rays. */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                          IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active,
+                          kernel_split_params.queue_size,
+                          local_queue_atomics,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 
 #ifdef __BRANCHED_PATH__
-	/* iter loop */
-	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
-		kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0;
-	}
-
-	ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
-	                          QUEUE_LIGHT_INDIRECT_ITER,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
-
-	if(IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) {
-		/* for render passes, sum and reset indirect light pass variables
-		 * for the next samples */
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-
-		path_radiance_sum_indirect(L);
-		path_radiance_reset_indirect(L);
-
-		if(kernel_split_branched_path_surface_indirect_light_iter(kg,
-		                                                          ray_index,
-		                                                          1.0f,
-		                                                          kernel_split_sd(branched_state_sd, ray_index),
-		                                                          true,
-		                                                          true))
-		{
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-		}
-		else {
-			kernel_split_branched_path_indirect_loop_end(kg, ray_index);
-			kernel_split_branched_transparent_bounce(kg, ray_index);
-		}
-	}
+  /* iter loop */
+  if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+    kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0;
+  }
+
+  ray_index = get_ray_index(kg,
+                            ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+                            QUEUE_LIGHT_INDIRECT_ITER,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
+
+  if (IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) {
+    /* for render passes, sum and reset indirect light pass variables
+     * for the next samples */
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+    path_radiance_sum_indirect(L);
+    path_radiance_reset_indirect(L);
+
+    if (kernel_split_branched_path_surface_indirect_light_iter(
+            kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) {
+      ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+    }
+    else {
+      kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+      kernel_split_branched_transparent_bounce(kg, ray_index);
+    }
+  }
 
 #  ifdef __VOLUME__
-	/* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_VOLUME_INDIRECT_ITER,
-	                        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER),
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
-
-#  endif  /* __VOLUME__ */
+  /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  enqueue_ray_index_local(
+      ray_index,
+      QUEUE_VOLUME_INDIRECT_ITER,
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER),
+      kernel_split_params.queue_size,
+      local_queue_atomics,
+      kernel_split_state.queue_data,
+      kernel_split_params.queue_index);
+
+#  endif /* __VOLUME__ */
 
 #  ifdef __SUBSURFACE__
-	/* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-	ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_SUBSURFACE_INDIRECT_ITER,
-	                        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER),
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
-#  endif  /* __SUBSURFACE__ */
-#endif  /* __BRANCHED_PATH__ */
+  /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+  ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  enqueue_ray_index_local(
+      ray_index,
+      QUEUE_SUBSURFACE_INDIRECT_ITER,
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER),
+      kernel_split_params.queue_size,
+      local_queue_atomics,
+      kernel_split_state.queue_data,
+      kernel_split_params.queue_index);
+#  endif /* __SUBSURFACE__ */
+#endif   /* __BRANCHED_PATH__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
index fdd54225b07..3faa3208341 100644
--- a/intern/cycles/kernel/split/kernel_path_init.h
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -21,61 +21,59 @@ CCL_NAMESPACE_BEGIN
  *
  * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
  */
-ccl_device void kernel_path_init(KernelGlobals *kg) {
-	int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
+ccl_device void kernel_path_init(KernelGlobals *kg)
+{
+  int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
 
-	/* This is the first assignment to ray_state;
-	 * So we dont use ASSIGN_RAY_STATE macro.
-	 */
-	kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
+  /* This is the first assignment to ray_state;
+   * So we dont use ASSIGN_RAY_STATE macro.
+   */
+  kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
 
-	/* Get work. */
-	ccl_global uint *work_pools = kernel_split_params.work_pools;
-	uint total_work_size = kernel_split_params.total_work_size;
-	uint work_index;
+  /* Get work. */
+  ccl_global uint *work_pools = kernel_split_params.work_pools;
+  uint total_work_size = kernel_split_params.total_work_size;
+  uint work_index;
 
-	if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
-		/* No more work, mark ray as inactive */
-		kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
+  if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
+    /* No more work, mark ray as inactive */
+    kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
 
-		return;
-	}
+    return;
+  }
 
-	ccl_global WorkTile *tile = &kernel_split_params.tile;
-	uint x, y, sample;
-	get_work_pixel(tile, work_index, &x, &y, &sample);
+  ccl_global WorkTile *tile = &kernel_split_params.tile;
+  uint x, y, sample;
+  get_work_pixel(tile, work_index, &x, &y, &sample);
 
-	/* Store buffer offset for writing to passes. */
-	uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
-	kernel_split_state.buffer_offset[ray_index] = buffer_offset;
+  /* Store buffer offset for writing to passes. */
+  uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+  kernel_split_state.buffer_offset[ray_index] = buffer_offset;
 
-	/* Initialize random numbers and ray. */
-	uint rng_hash;
-	kernel_path_trace_setup(kg,
-	                        sample,
-	                        x, y,
-	                        &rng_hash,
-	                        &kernel_split_state.ray[ray_index]);
+  /* Initialize random numbers and ray. */
+  uint rng_hash;
+  kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &kernel_split_state.ray[ray_index]);
 
-	if(kernel_split_state.ray[ray_index].t != 0.0f) {
-		/* Initialize throughput, path radiance, Ray, PathState;
-		 * These rays proceed with path-iteration.
-		 */
-		kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
-		path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
-		path_state_init(kg,
-		                AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
-		                &kernel_split_state.path_state[ray_index],
-		                rng_hash,
-		                sample,
-		                &kernel_split_state.ray[ray_index]);
+  if (kernel_split_state.ray[ray_index].t != 0.0f) {
+    /* Initialize throughput, path radiance, Ray, PathState;
+     * These rays proceed with path-iteration.
+     */
+    kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
+    path_radiance_init(&kernel_split_state.path_radiance[ray_index],
+                       kernel_data.film.use_light_pass);
+    path_state_init(kg,
+                    AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+                    &kernel_split_state.path_state[ray_index],
+                    rng_hash,
+                    sample,
+                    &kernel_split_state.ray[ray_index]);
 #ifdef __SUBSURFACE__
-		kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+    kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
 #endif
-	}
-	else {
-		ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
-	}
+  }
+  else {
+    ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index df67fabab19..2db87f7a671 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -35,58 +35,53 @@ CCL_NAMESPACE_BEGIN
  *   - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
  *     RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays.
  */
-ccl_device void kernel_queue_enqueue(KernelGlobals *kg,
-                                     ccl_local_param QueueEnqueueLocals *locals)
+ccl_device void kernel_queue_enqueue(KernelGlobals *kg, ccl_local_param QueueEnqueueLocals *locals)
 {
-	/* We have only 2 cases (Hit/Not-Hit) */
-	int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  /* We have only 2 cases (Hit/Not-Hit) */
+  int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 
-	if(lidx == 0) {
-		locals->queue_atomics[0] = 0;
-		locals->queue_atomics[1] = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (lidx == 0) {
+    locals->queue_atomics[0] = 0;
+    locals->queue_atomics[1] = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int queue_number = -1;
+  int queue_number = -1;
 
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
-	   IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
-	   IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
-		queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
-	}
-	else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
-	        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) ||
-	        IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
-		queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
-	}
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
+      IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
+    queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
+  }
+  else if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+           IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) ||
+           IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
+    queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
+  }
 
-	unsigned int my_lqidx;
-	if(queue_number != -1) {
-		my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics);
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  unsigned int my_lqidx;
+  if (queue_number != -1) {
+    my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics);
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	if(lidx == 0) {
-		locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] =
-		        get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-		                                    locals->queue_atomics,
-		                                    kernel_split_params.queue_index);
-		locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] =
-		        get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-		                                    locals->queue_atomics,
-		                                    kernel_split_params.queue_index);
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  if (lidx == 0) {
+    locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = get_global_per_queue_offset(
+        QUEUE_ACTIVE_AND_REGENERATED_RAYS, locals->queue_atomics, kernel_split_params.queue_index);
+    locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = get_global_per_queue_offset(
+        QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+        locals->queue_atomics,
+        kernel_split_params.queue_index);
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	unsigned int my_gqidx;
-	if(queue_number != -1) {
-		my_gqidx = get_global_queue_index(queue_number,
-		                                  kernel_split_params.queue_size,
-		                                  my_lqidx,
-		                                  locals->queue_atomics);
-		kernel_split_state.queue_data[my_gqidx] = ray_index;
-	}
+  unsigned int my_gqidx;
+  if (queue_number != -1) {
+    my_gqidx = get_global_queue_index(
+        queue_number, kernel_split_params.queue_size, my_lqidx, locals->queue_atomics);
+    kernel_split_state.queue_data[my_gqidx] = ray_index;
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index f5378bc172b..5fef3e045f8 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -25,55 +25,56 @@ CCL_NAMESPACE_BEGIN
  */
 ccl_device void kernel_scene_intersect(KernelGlobals *kg)
 {
-	/* Fetch use_queues_flag */
-	char local_use_queues_flag = *kernel_split_params.use_queues_flag;
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  /* Fetch use_queues_flag */
+  char local_use_queues_flag = *kernel_split_params.use_queues_flag;
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(local_use_queues_flag) {
-		ray_index = get_ray_index(kg, ray_index,
-		                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-		                          kernel_split_state.queue_data,
-		                          kernel_split_params.queue_size,
-		                          0);
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (local_use_queues_flag) {
+    ray_index = get_ray_index(kg,
+                              ray_index,
+                              QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
+                              0);
 
-		if(ray_index == QUEUE_EMPTY_SLOT) {
-			return;
-		}
-	}
+    if (ray_index == QUEUE_EMPTY_SLOT) {
+      return;
+    }
+  }
 
-	/* All regenerated rays become active here */
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
+  /* All regenerated rays become active here */
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
 #ifdef __BRANCHED_PATH__
-		if(kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) {
-			kernel_split_path_end(kg, ray_index);
-		}
-		else
-#endif  /* __BRANCHED_PATH__ */
-		{
-			ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
-		}
-	}
+    if (kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) {
+      kernel_split_path_end(kg, ray_index);
+    }
+    else
+#endif /* __BRANCHED_PATH__ */
+    {
+      ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+    }
+  }
 
-	if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
-		return;
-	}
+  if (!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+    return;
+  }
 
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	Ray ray = kernel_split_state.ray[ray_index];
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  Ray ray = kernel_split_state.ray[ray_index];
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
 
-	Intersection isect;
-	bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L);
-	kernel_split_state.isect[ray_index] = isect;
+  Intersection isect;
+  bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L);
+  kernel_split_state.isect[ray_index] = isect;
 
-	if(!hit) {
-		/* Change the state of rays that hit the background;
-		 * These rays undergo special processing in the
-		 * background_bufferUpdate kernel.
-		 */
-		ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
-	}
+  if (!hit) {
+    /* Change the state of rays that hit the background;
+     * These rays undergo special processing in the
+     * background_bufferUpdate kernel.
+     */
+    ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 2bc2d300699..8e39c9797e5 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -22,45 +22,46 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_shader_eval(KernelGlobals *kg)
 {
 
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	/* Sorting on cuda split is not implemented */
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  /* Sorting on cuda split is not implemented */
 #ifdef __KERNEL_CUDA__
-	int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+  int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
 #else
-	int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS];
+  int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS];
 #endif
-	if(ray_index >= queue_index) {
-		return;
-	}
-	ray_index = get_ray_index(kg, ray_index,
+  if (ray_index >= queue_index) {
+    return;
+  }
+  ray_index = get_ray_index(kg,
+                            ray_index,
 #ifdef __KERNEL_CUDA__
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
 #else
-	                          QUEUE_SHADER_SORTED_RAYS,
+                            QUEUE_SHADER_SORTED_RAYS,
 #endif
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
 
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  ccl_global char *ray_state = kernel_split_state.ray_state;
+  if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
 
-		shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag);
+    shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag);
 #ifdef __BRANCHED_PATH__
-		if(kernel_data.integrator.branched) {
-			shader_merge_closures(kernel_split_sd(sd, ray_index));
-		}
-		else
+    if (kernel_data.integrator.branched) {
+      shader_merge_closures(kernel_split_sd(sd, ray_index));
+    }
+    else
 #endif
-		{
-			shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
-		}
-	}
+    {
+      shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
+    }
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_setup.h b/intern/cycles/kernel/split/kernel_shader_setup.h
index ea3ec2ec83f..da332db2c98 100644
--- a/intern/cycles/kernel/split/kernel_shader_setup.h
+++ b/intern/cycles/kernel/split/kernel_shader_setup.h
@@ -25,54 +25,52 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_shader_setup(KernelGlobals *kg,
                                     ccl_local_param unsigned int *local_queue_atomics)
 {
-	/* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
-	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-		*local_queue_atomics = 0;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    *local_queue_atomics = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
-	if(ray_index >= queue_index) {
-		return;
-	}
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          0);
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+  if (ray_index >= queue_index) {
+    return;
+  }
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
 
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 
-	char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0;
-	enqueue_ray_index_local(ray_index,
-	                        QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	                        enqueue_flag,
-	                        kernel_split_params.queue_size,
-	                        local_queue_atomics,
-	                        kernel_split_state.queue_data,
-	                        kernel_split_params.queue_index);
+  char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 :
+                                                                                               0;
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          local_queue_atomics,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 
-	/* Continue on with shader evaluation. */
-	if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
-		Intersection isect = kernel_split_state.isect[ray_index];
-		Ray ray = kernel_split_state.ray[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
+  /* Continue on with shader evaluation. */
+  if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+    Intersection isect = kernel_split_state.isect[ray_index];
+    Ray ray = kernel_split_state.ray[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
 
-		shader_setup_from_ray(kg,
-		                      sd,
-		                      &isect,
-		                      &ray);
+    shader_setup_from_ray(kg, sd, &isect, &ray);
 
 #ifdef __VOLUME__
-		if(sd->flag & SD_HAS_ONLY_VOLUME) {
-			ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME);
-		}
+    if (sd->flag & SD_HAS_ONLY_VOLUME) {
+      ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME);
+    }
 #endif
-	}
-
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h
index 666355de334..95d33a42014 100644
--- a/intern/cycles/kernel/split/kernel_shader_sort.h
+++ b/intern/cycles/kernel/split/kernel_shader_sort.h
@@ -16,82 +16,82 @@
 
 CCL_NAMESPACE_BEGIN
 
-
-ccl_device void kernel_shader_sort(KernelGlobals *kg,
-                                   ccl_local_param ShaderSortLocals *locals)
+ccl_device void kernel_shader_sort(KernelGlobals *kg, ccl_local_param ShaderSortLocals *locals)
 {
 #ifndef __KERNEL_CUDA__
-	int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
-	if(tid == 0) {
-		kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize;
-	}
+  int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+  if (tid == 0) {
+    kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize;
+  }
 
-	uint offset = (tid/SHADER_SORT_LOCAL_SIZE)*SHADER_SORT_BLOCK_SIZE;
-	if(offset >= qsize) {
-		return;
-	}
+  uint offset = (tid / SHADER_SORT_LOCAL_SIZE) * SHADER_SORT_BLOCK_SIZE;
+  if (offset >= qsize) {
+    return;
+  }
 
-	int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
-	uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size);
-	uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size);
-	ccl_local uint *local_value = &locals->local_value[0];
-	ccl_local ushort *local_index = &locals->local_index[0];
+  int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
+  uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size);
+  uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size);
+  ccl_local uint *local_value = &locals->local_value[0];
+  ccl_local ushort *local_index = &locals->local_index[0];
 
-	/* copy to local memory */
-	for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
-		uint idx = offset + i + lid;
-		uint add = input + idx;
-		uint value = (~0);
-		if(idx < qsize) {
-			int ray_index = kernel_split_state.queue_data[add];
-			bool valid = (ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
-			if(valid) {
-				value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK;
-			}
-		}
-		local_value[i + lid] = value;
-		local_index[i + lid] = i + lid;
-	}
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  /* copy to local memory */
+  for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+    uint idx = offset + i + lid;
+    uint add = input + idx;
+    uint value = (~0);
+    if (idx < qsize) {
+      int ray_index = kernel_split_state.queue_data[add];
+      bool valid = (ray_index != QUEUE_EMPTY_SLOT) &&
+                   IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+      if (valid) {
+        value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK;
+      }
+    }
+    local_value[i + lid] = value;
+    local_index[i + lid] = i + lid;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	/* skip sorting for cpu split kernel */
+  /* skip sorting for cpu split kernel */
 #  ifdef __KERNEL_OPENCL__
 
-	/* bitonic sort */
-	for(uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
-		for(uint inc = length; inc > 0; inc >>= 1) {
-			for(uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
-				uint i = lid + ii;
-				bool direction = ((i & (length << 1)) != 0);
-				uint j = i ^ inc;
-				ushort ioff = local_index[i];
-				ushort joff = local_index[j];
-				uint iKey = local_value[ioff];
-				uint jKey = local_value[joff];
-				bool smaller = (jKey < iKey) || (jKey == iKey && j < i);
-				bool swap = smaller ^ (j < i) ^ direction;
-				ccl_barrier(CCL_LOCAL_MEM_FENCE);
-				local_index[i] = (swap) ? joff : ioff;
-				local_index[j] = (swap) ? ioff : joff;
-				ccl_barrier(CCL_LOCAL_MEM_FENCE);
-			}
-		}
-	}
-#  endif  /* __KERNEL_OPENCL__ */
+  /* bitonic sort */
+  for (uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
+    for (uint inc = length; inc > 0; inc >>= 1) {
+      for (uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
+        uint i = lid + ii;
+        bool direction = ((i & (length << 1)) != 0);
+        uint j = i ^ inc;
+        ushort ioff = local_index[i];
+        ushort joff = local_index[j];
+        uint iKey = local_value[ioff];
+        uint jKey = local_value[joff];
+        bool smaller = (jKey < iKey) || (jKey == iKey && j < i);
+        bool swap = smaller ^ (j < i) ^ direction;
+        ccl_barrier(CCL_LOCAL_MEM_FENCE);
+        local_index[i] = (swap) ? joff : ioff;
+        local_index[j] = (swap) ? ioff : joff;
+        ccl_barrier(CCL_LOCAL_MEM_FENCE);
+      }
+    }
+  }
+#  endif /* __KERNEL_OPENCL__ */
 
-	/* copy to destination */
-	for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
-		uint idx = offset + i + lid;
-		uint lidx = local_index[i + lid];
-		uint outi = output + idx;
-		uint ini = input + offset + lidx;
-		uint value = local_value[lidx];
-		if(idx < qsize) {
-			kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT : kernel_split_state.queue_data[ini];
-		}
-	}
-#endif  /* __KERNEL_CUDA__ */
+  /* copy to destination */
+  for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+    uint idx = offset + i + lid;
+    uint lidx = local_index[i + lid];
+    uint outi = output + idx;
+    uint ini = input + offset + lidx;
+    uint value = local_value[lidx];
+    if (idx < qsize) {
+      kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT :
+                                                              kernel_split_state.queue_data[ini];
+    }
+  }
+#endif /* __KERNEL_CUDA__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index fb08112503a..5d772fc597b 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -19,35 +19,40 @@ CCL_NAMESPACE_BEGIN
 /* Shadow ray cast for AO. */
 ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
 {
-	unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = QUEUE_EMPTY_SLOT;
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(thread_index < ao_queue_length) {
-		ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS,
-		                          kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
-	}
+  int ray_index = QUEUE_EMPTY_SLOT;
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (thread_index < ao_queue_length) {
+    ray_index = get_ray_index(kg,
+                              thread_index,
+                              QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
+                              1);
+  }
 
-	if(ray_index == QUEUE_EMPTY_SLOT) {
-		return;
-	}
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
 
-	ShaderData *sd = kernel_split_sd(sd, ray_index);
-	ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	float3 throughput = kernel_split_state.throughput[ray_index];
+  ShaderData *sd = kernel_split_sd(sd, ray_index);
+  ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  float3 throughput = kernel_split_state.throughput[ray_index];
 
 #ifdef __BRANCHED_PATH__
-	if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+  if (!kernel_data.integrator.branched ||
+      IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
 #endif
-		kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd));
+    kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd));
 #ifdef __BRANCHED_PATH__
-	}
-	else {
-		kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput);
-	}
+  }
+  else {
+    kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput);
+  }
 #endif
 }
 
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index da072fd5f1a..82990ce9fae 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -19,89 +19,80 @@ CCL_NAMESPACE_BEGIN
 /* Shadow ray cast for direct visible light. */
 ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
 {
-	unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
-	ccl_barrier(CCL_LOCAL_MEM_FENCE);
+  unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-	int ray_index = QUEUE_EMPTY_SLOT;
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(thread_index < dl_queue_length) {
-		ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_DL_RAYS,
-		                          kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
-	}
+  int ray_index = QUEUE_EMPTY_SLOT;
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (thread_index < dl_queue_length) {
+    ray_index = get_ray_index(kg,
+                              thread_index,
+                              QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+                              kernel_split_state.queue_data,
+                              kernel_split_params.queue_size,
+                              1);
+  }
 
 #ifdef __BRANCHED_PATH__
-	/* TODO(mai): move this somewhere else? */
-	if(thread_index == 0) {
-		/* Clear QUEUE_INACTIVE_RAYS before next kernel. */
-		kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0;
-	}
-#endif  /* __BRANCHED_PATH__ */
+  /* TODO(mai): move this somewhere else? */
+  if (thread_index == 0) {
+    /* Clear QUEUE_INACTIVE_RAYS before next kernel. */
+    kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0;
+  }
+#endif /* __BRANCHED_PATH__ */
 
-	if(ray_index == QUEUE_EMPTY_SLOT)
-		return;
+  if (ray_index == QUEUE_EMPTY_SLOT)
+    return;
 
-	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-	Ray ray = kernel_split_state.light_ray[ray_index];
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ShaderData *sd = kernel_split_sd(sd, ray_index);
-	float3 throughput = kernel_split_state.throughput[ray_index];
+  ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+  Ray ray = kernel_split_state.light_ray[ray_index];
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ShaderData *sd = kernel_split_sd(sd, ray_index);
+  float3 throughput = kernel_split_state.throughput[ray_index];
 
-	BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
-	ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-	bool is_lamp = kernel_split_state.is_lamp[ray_index];
+  BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
+  ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+  bool is_lamp = kernel_split_state.is_lamp[ray_index];
 
-#  if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
-	bool use_branched = false;
-	int all = 0;
+#if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
+  bool use_branched = false;
+  int all = 0;
 
-	if(state->flag & PATH_RAY_SHADOW_CATCHER) {
-		use_branched = true;
-		all = 1;
-	}
-#    if defined(__BRANCHED_PATH__)
-	else if(kernel_data.integrator.branched) {
-		use_branched = true;
+  if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+    use_branched = true;
+    all = 1;
+  }
+#  if defined(__BRANCHED_PATH__)
+  else if (kernel_data.integrator.branched) {
+    use_branched = true;
 
-		if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
-			all = (kernel_data.integrator.sample_all_lights_indirect);
-		}
-		else
-		{
-			all = (kernel_data.integrator.sample_all_lights_direct);
-		}
-	}
-#    endif  /* __BRANCHED_PATH__ */
+    if (IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+      all = (kernel_data.integrator.sample_all_lights_indirect);
+    }
+    else {
+      all = (kernel_data.integrator.sample_all_lights_direct);
+    }
+  }
+#  endif /* __BRANCHED_PATH__ */
 
-	if(use_branched) {
-		kernel_branched_path_surface_connect_light(kg,
-		                                           sd,
-		                                           emission_sd,
-		                                           state,
-		                                           throughput,
-		                                           1.0f,
-		                                           L,
-		                                           all);
-	}
-	else
-#  endif  /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/
-	{
-		/* trace shadow ray */
-		float3 shadow;
+  if (use_branched) {
+    kernel_branched_path_surface_connect_light(
+        kg, sd, emission_sd, state, throughput, 1.0f, L, all);
+  }
+  else
+#endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/
+  {
+    /* trace shadow ray */
+    float3 shadow;
 
-		if(!shadow_blocked(kg,
-		                   sd,
-		                   emission_sd,
-		                   state,
-		                   &ray,
-		                   &shadow))
-		{
-			/* accumulate */
-			path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
-		}
-		else {
-			path_radiance_accum_total_light(L, state, throughput, &L_light);
-		}
-	}
+    if (!shadow_blocked(kg, sd, emission_sd, state, &ray, &shadow)) {
+      /* accumulate */
+      path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+    }
+    else {
+      path_radiance_accum_total_light(L, state, throughput, &L_light);
+    }
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 4b86696691a..384bc952460 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef  __KERNEL_SPLIT_H__
-#define  __KERNEL_SPLIT_H__
+#ifndef __KERNEL_SPLIT_H__
+#define __KERNEL_SPLIT_H__
 
 #include "kernel/kernel_math.h"
 #include "kernel/kernel_types.h"
@@ -57,47 +57,48 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
 {
-	ccl_global char *ray_state = kernel_split_state.ray_state;
+  ccl_global char *ray_state = kernel_split_state.ray_state;
 
 #ifdef __BRANCHED_PATH__
 #  ifdef __SUBSURFACE__
-	ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
-
-	if(ss_indirect->num_rays) {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-	}
-	else
-#  endif  /* __SUBSURFACE__ */
-	if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
-		int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
-
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-		PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
-
-		path_radiance_sum_indirect(L);
-		path_radiance_accum_sample(orig_ray_L, L);
-
-		atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
-
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
-	}
-	else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
-	}
-	else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER);
-	}
-	else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER);
-	}
-	else {
-		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-	}
+  ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+
+  if (ss_indirect->num_rays) {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+  }
+  else
+#  endif /* __SUBSURFACE__ */
+      if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
+    int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
+
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+    PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
+
+    path_radiance_sum_indirect(L);
+    path_radiance_accum_sample(orig_ray_L, L);
+
+    atomic_fetch_and_dec_uint32(
+        (ccl_global uint *)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
+
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+  }
+  else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
+  }
+  else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER);
+  }
+  else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER);
+  }
+  else {
+    ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+  }
 #else
-	ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+  ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
 #endif
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_SPLIT_H__ */
+#endif /* __KERNEL_SPLIT_H__ */
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 3f6b3977d79..433b1221a37 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -24,22 +24,22 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
 {
-	(void) kg;  /* Unused on CPU. */
+  (void)kg; /* Unused on CPU. */
 
-	uint64_t size = 0;
-#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
-	size = size SPLIT_DATA_ENTRIES;
+  uint64_t size = 0;
+#define SPLIT_DATA_ENTRY(type, name, num) +align_up(num_elements *num * sizeof(type), 16)
+  size = size SPLIT_DATA_ENTRIES;
 #undef SPLIT_DATA_ENTRY
 
-	uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+  uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1);
 
 #ifdef __BRANCHED_PATH__
-	size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+  size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
 #endif
 
-	size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+  size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
 
-	return size;
+  return size;
 }
 
 ccl_device_inline void split_data_init(KernelGlobals *kg,
@@ -48,28 +48,29 @@ ccl_device_inline void split_data_init(KernelGlobals *kg,
                                        ccl_global void *data,
                                        ccl_global char *ray_state)
 {
-	(void) kg;  /* Unused on CPU. */
+  (void)kg; /* Unused on CPU. */
 
-	ccl_global char *p = (ccl_global char*)data;
+  ccl_global char *p = (ccl_global char *)data;
 
 #define SPLIT_DATA_ENTRY(type, name, num) \
-	split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16);
-	SPLIT_DATA_ENTRIES;
+  split_data->name = (type *)p; \
+  p += align_up(num_elements * num * sizeof(type), 16);
+  SPLIT_DATA_ENTRIES;
 #undef SPLIT_DATA_ENTRY
 
-	uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+  uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1);
 
 #ifdef __BRANCHED_PATH__
-	split_data->_branched_state_sd = (ShaderData*)p;
-	p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+  split_data->_branched_state_sd = (ShaderData *)p;
+  p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
 #endif
 
-	split_data->_sd = (ShaderData*)p;
-	p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+  split_data->_sd = (ShaderData *)p;
+  p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
 
-	split_data->ray_state = ray_state;
+  split_data->ray_state = ray_state;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_SPLIT_DATA_H__ */
+#endif /* __KERNEL_SPLIT_DATA_H__ */
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 83df1e2a0a6..6ff3f5bdb55 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -22,17 +22,17 @@ CCL_NAMESPACE_BEGIN
 /* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */
 
 typedef struct SplitParams {
-	WorkTile tile;
-	uint total_work_size;
+  WorkTile tile;
+  uint total_work_size;
 
-	ccl_global unsigned int *work_pools;
+  ccl_global unsigned int *work_pools;
 
-	ccl_global int *queue_index;
-	int queue_size;
-	ccl_global char *use_queues_flag;
+  ccl_global int *queue_index;
+  int queue_size;
+  ccl_global char *use_queues_flag;
 
-	/* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
-	int dummy_sd_flag;
+  /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
+  int dummy_sd_flag;
 } SplitParams;
 
 /* Global memory variables [porting]; These memory is used for
@@ -46,98 +46,98 @@ typedef struct SplitParams {
 #ifdef __BRANCHED_PATH__
 
 typedef ccl_global struct SplitBranchedState {
-	/* various state that must be kept and restored after an indirect loop */
-	PathState path_state;
-	float3 throughput;
-	Ray ray;
+  /* various state that must be kept and restored after an indirect loop */
+  PathState path_state;
+  float3 throughput;
+  Ray ray;
 
-	Intersection isect;
+  Intersection isect;
 
-	char ray_state;
+  char ray_state;
 
-	/* indirect loop state */
-	int next_closure;
-	int next_sample;
+  /* indirect loop state */
+  int next_closure;
+  int next_sample;
 
-#ifdef __SUBSURFACE__
-	int ss_next_closure;
-	int ss_next_sample;
-	int next_hit;
-	int num_hits;
-
-	uint lcg_state;
-	LocalIntersection ss_isect;
-#endif  /*__SUBSURFACE__ */
-
-	int shared_sample_count; /* number of branched samples shared with other threads */
-	int original_ray; /* index of original ray when sharing branched samples */
-	bool waiting_on_shared_samples;
+#  ifdef __SUBSURFACE__
+  int ss_next_closure;
+  int ss_next_sample;
+  int next_hit;
+  int num_hits;
+
+  uint lcg_state;
+  LocalIntersection ss_isect;
+#  endif /*__SUBSURFACE__ */
+
+  int shared_sample_count; /* number of branched samples shared with other threads */
+  int original_ray;        /* index of original ray when sharing branched samples */
+  bool waiting_on_shared_samples;
 } SplitBranchedState;
 
-#define SPLIT_DATA_BRANCHED_ENTRIES \
-	SPLIT_DATA_ENTRY( SplitBranchedState, branched_state, 1) \
-	SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0)
+#  define SPLIT_DATA_BRANCHED_ENTRIES \
+    SPLIT_DATA_ENTRY(SplitBranchedState, branched_state, 1) \
+    SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0)
 #else
-#define SPLIT_DATA_BRANCHED_ENTRIES
-#endif  /* __BRANCHED_PATH__ */
+#  define SPLIT_DATA_BRANCHED_ENTRIES
+#endif /* __BRANCHED_PATH__ */
 
 #ifdef __SUBSURFACE__
 #  define SPLIT_DATA_SUBSURFACE_ENTRIES \
-	SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1)
+    SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1)
 #else
 #  define SPLIT_DATA_SUBSURFACE_ENTRIES
-#endif  /* __SUBSURFACE__ */
+#endif /* __SUBSURFACE__ */
 
 #ifdef __VOLUME__
-#  define SPLIT_DATA_VOLUME_ENTRIES \
-	SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1)
+#  define SPLIT_DATA_VOLUME_ENTRIES SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1)
 #else
 #  define SPLIT_DATA_VOLUME_ENTRIES
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 #define SPLIT_DATA_ENTRIES \
-	SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
-	SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
-	SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
-	SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
-	SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
-	SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
-	SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
-	SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
-	SPLIT_DATA_SUBSURFACE_ENTRIES \
-	SPLIT_DATA_VOLUME_ENTRIES \
-	SPLIT_DATA_BRANCHED_ENTRIES \
-	SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
+  SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
+  SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
+  SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
+  SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
+  SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+  SPLIT_DATA_ENTRY( \
+      ccl_global int, queue_data, (NUM_QUEUES * 2)) /* TODO(mai): this is too large? */ \
+  SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
+  SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
+  SPLIT_DATA_SUBSURFACE_ENTRIES \
+  SPLIT_DATA_VOLUME_ENTRIES \
+  SPLIT_DATA_BRANCHED_ENTRIES \
+  SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
 
 /* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
 #define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
-	SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
-	SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
-	SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
-	SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
-	SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
-	SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
-	SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
-	SPLIT_DATA_SUBSURFACE_ENTRIES \
-	SPLIT_DATA_VOLUME_ENTRIES \
-	SPLIT_DATA_BRANCHED_ENTRIES \
-	SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
+  SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
+  SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
+  SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
+  SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
+  SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
+  SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+  SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
+  SPLIT_DATA_SUBSURFACE_ENTRIES \
+  SPLIT_DATA_VOLUME_ENTRIES \
+  SPLIT_DATA_BRANCHED_ENTRIES \
+  SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
 
 /* struct that holds pointers to data in the shared state buffer */
 typedef struct SplitData {
 #define SPLIT_DATA_ENTRY(type, name, num) type *name;
-	SPLIT_DATA_ENTRIES
+  SPLIT_DATA_ENTRIES
 #undef SPLIT_DATA_ENTRY
 
-	/* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
-	 * the host easily) but is still used the same as the other data so we have it here in this struct as well
-	 */
-	ccl_global char *ray_state;
+  /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
+   * the host easily) but is still used the same as the other data so we have it here in this struct as well
+   */
+  ccl_global char *ray_state;
 } SplitData;
 
 #ifndef __KERNEL_CUDA__
@@ -148,30 +148,30 @@ __device__ SplitData __split_data;
 #  define kernel_split_state (__split_data)
 __device__ SplitParams __split_param_data;
 #  define kernel_split_params (__split_param_data)
-#endif  /* __KERNEL_CUDA__ */
+#endif /* __KERNEL_CUDA__ */
 
-#define kernel_split_sd(sd, ray_index) ((ShaderData*) \
-	( \
-		((ccl_global char*)kernel_split_state._##sd) + \
-		(sizeof(ShaderData) + sizeof(ShaderClosure)*(kernel_data.integrator.max_closures-1)) * (ray_index) \
-	))
+#define kernel_split_sd(sd, ray_index) \
+  ((ShaderData *)(((ccl_global char *)kernel_split_state._##sd) + \
+                  (sizeof(ShaderData) + \
+                   sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1)) * \
+                      (ray_index)))
 
 /* Local storage for queue_enqueue kernel. */
 typedef struct QueueEnqueueLocals {
-	uint queue_atomics[2];
+  uint queue_atomics[2];
 } QueueEnqueueLocals;
 
 /* Local storage for holdout_emission_blurring_pathtermination_ao kernel. */
 typedef struct BackgroundAOLocals {
-	uint queue_atomics_bg;
-	uint queue_atomics_ao;
+  uint queue_atomics_bg;
+  uint queue_atomics_ao;
 } BackgroundAOLocals;
 
 typedef struct ShaderSortLocals {
-	uint local_value[SHADER_SORT_BLOCK_SIZE];
-	ushort local_index[SHADER_SORT_BLOCK_SIZE];
+  uint local_value[SHADER_SORT_BLOCK_SIZE];
+  ushort local_index[SHADER_SORT_BLOCK_SIZE];
 } ShaderSortLocals;
 
 CCL_NAMESPACE_END
 
-#endif  /* __KERNEL_SPLIT_DATA_TYPES_H__ */
+#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 08769fe303b..ba06ae3bc53 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -18,276 +18,247 @@ CCL_NAMESPACE_BEGIN
 
 #if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__)
 
-ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg,
+                                                                                 int ray_index)
 {
-	kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+  kernel_split_branched_path_indirect_loop_init(kg, ray_index);
 
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
 
-	branched_state->ss_next_closure = 0;
-	branched_state->ss_next_sample = 0;
+  branched_state->ss_next_closure = 0;
+  branched_state->ss_next_sample = 0;
 
-	branched_state->num_hits = 0;
-	branched_state->next_hit = 0;
+  branched_state->num_hits = 0;
+  branched_state->next_hit = 0;
 
-	ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
+  ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
 }
 
-ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(KernelGlobals *kg, int ray_index)
+ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(
+    KernelGlobals *kg, int ray_index)
 {
-	SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
-
-	ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index);
-	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-	ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
-	for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
-		ShaderClosure *sc = &sd->closure[i];
-
-		if(!CLOSURE_IS_BSSRDF(sc->type))
-			continue;
-
-		/* Closure memory will be overwritten, so read required variables now. */
-		Bssrdf *bssrdf = (Bssrdf *)sc;
-		ClosureType bssrdf_type = sc->type;
-		float bssrdf_roughness = bssrdf->roughness;
-
-		/* set up random number generator */
-		if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
-		   branched_state->next_closure == 0 && branched_state->next_sample == 0)
-		{
-			branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state,
-			                                                     0x68bc21eb);
-		}
-		int num_samples = kernel_data.integrator.subsurface_samples * 3;
-		float num_samples_inv = 1.0f/num_samples;
-		uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
-
-		/* do subsurface scatter step with copy of shader data, this will
-		 * replace the BSSRDF with a diffuse BSDF closure */
-		for(int j = branched_state->ss_next_sample; j < num_samples; j++) {
-			ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
-			*hit_state = branched_state->path_state;
-			hit_state->rng_hash = bssrdf_rng_hash;
-			path_state_branch(hit_state, j, num_samples);
-
-			ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect;
-			float bssrdf_u, bssrdf_v;
-			path_branched_rng_2D(kg,
-			                     bssrdf_rng_hash,
-			                     hit_state,
-			                     j,
-			                     num_samples,
-			                     PRNG_BSDF_U,
-			                     &bssrdf_u,
-			                     &bssrdf_v);
-
-			/* intersection is expensive so avoid doing multiple times for the same input */
-			if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) {
-				uint lcg_state = branched_state->lcg_state;
-				LocalIntersection ss_isect_private;
-
-				branched_state->num_hits = subsurface_scatter_multi_intersect(kg,
-				                                                              &ss_isect_private,
-				                                                              sd,
-				                                                              hit_state,
-				                                                              sc,
-				                                                              &lcg_state,
-				                                                              bssrdf_u, bssrdf_v,
-				                                                              true);
-
-				branched_state->lcg_state = lcg_state;
-				*ss_isect = ss_isect_private;
-			}
-
-			hit_state->rng_offset += PRNG_BOUNCE_NUM;
-
-#ifdef __VOLUME__
-			Ray volume_ray = branched_state->ray;
-			bool need_update_volume_stack =
-			        kernel_data.integrator.use_volumes &&
-			        sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
-#endif  /* __VOLUME__ */
-
-			/* compute lighting with the BSDF closure */
-			for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
-				ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index);
-				*bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
-				                   * important as the indirect path will write into bssrdf_sd */
-
-				LocalIntersection ss_isect_private = *ss_isect;
-				subsurface_scatter_multi_setup(kg,
-				                               &ss_isect_private,
-				                               hit,
-				                               bssrdf_sd,
-				                               hit_state,
-				                               bssrdf_type,
-				                               bssrdf_roughness);
-				*ss_isect = ss_isect_private;
-
-#ifdef __VOLUME__
-				if(need_update_volume_stack) {
-					/* Setup ray from previous surface point to the new one. */
-					float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
-					volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
-
-					for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
-						hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k];
-					}
-
-					kernel_volume_stack_update_for_subsurface(kg,
-					                                          emission_sd,
-					                                          &volume_ray,
-					                                          hit_state->volume_stack);
-				}
-#endif  /* __VOLUME__ */
-
-#ifdef __EMISSION__
-				if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
-					/* direct light */
-					if(kernel_data.integrator.use_direct_light) {
-						int all = (kernel_data.integrator.sample_all_lights_direct) ||
-							      (hit_state->flag & PATH_RAY_SHADOW_CATCHER);
-						kernel_branched_path_surface_connect_light(kg,
-						                                           bssrdf_sd,
-						                                           emission_sd,
-						                                           hit_state,
-						                                           branched_state->throughput,
-						                                           num_samples_inv,
-						                                           L,
-						                                           all);
-					}
-				}
-#endif  /* __EMISSION__ */
-
-				/* indirect light */
-				if(kernel_split_branched_path_surface_indirect_light_iter(kg,
-				                                                          ray_index,
-				                                                          num_samples_inv,
-				                                                          bssrdf_sd,
-				                                                          false,
-				                                                          false))
-				{
-					branched_state->ss_next_closure = i;
-					branched_state->ss_next_sample = j;
-					branched_state->next_hit = hit;
-
-					return true;
-				}
-
-				branched_state->next_closure = 0;
-			}
-
-			branched_state->next_hit = 0;
-		}
-
-		branched_state->ss_next_sample = 0;
-	}
-
-	branched_state->ss_next_closure = sd->num_closure;
-
-	branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
-	if(branched_state->waiting_on_shared_samples) {
-		return true;
-	}
-
-	kernel_split_branched_path_indirect_loop_end(kg, ray_index);
-
-	return false;
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+  ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index);
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+  for (int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
+
+    if (!CLOSURE_IS_BSSRDF(sc->type))
+      continue;
+
+    /* Closure memory will be overwritten, so read required variables now. */
+    Bssrdf *bssrdf = (Bssrdf *)sc;
+    ClosureType bssrdf_type = sc->type;
+    float bssrdf_roughness = bssrdf->roughness;
+
+    /* set up random number generator */
+    if (branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
+        branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+      branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state,
+                                                           0x68bc21eb);
+    }
+    int num_samples = kernel_data.integrator.subsurface_samples * 3;
+    float num_samples_inv = 1.0f / num_samples;
+    uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
+
+    /* do subsurface scatter step with copy of shader data, this will
+     * replace the BSSRDF with a diffuse BSDF closure */
+    for (int j = branched_state->ss_next_sample; j < num_samples; j++) {
+      ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
+      *hit_state = branched_state->path_state;
+      hit_state->rng_hash = bssrdf_rng_hash;
+      path_state_branch(hit_state, j, num_samples);
+
+      ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect;
+      float bssrdf_u, bssrdf_v;
+      path_branched_rng_2D(
+          kg, bssrdf_rng_hash, hit_state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+
+      /* intersection is expensive so avoid doing multiple times for the same input */
+      if (branched_state->next_hit == 0 && branched_state->next_closure == 0 &&
+          branched_state->next_sample == 0) {
+        uint lcg_state = branched_state->lcg_state;
+        LocalIntersection ss_isect_private;
+
+        branched_state->num_hits = subsurface_scatter_multi_intersect(
+            kg, &ss_isect_private, sd, hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+
+        branched_state->lcg_state = lcg_state;
+        *ss_isect = ss_isect_private;
+      }
+
+      hit_state->rng_offset += PRNG_BOUNCE_NUM;
+
+#  ifdef __VOLUME__
+      Ray volume_ray = branched_state->ray;
+      bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+                                      sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+#  endif /* __VOLUME__ */
+
+      /* compute lighting with the BSDF closure */
+      for (int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
+        ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index);
+        *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
+                           * important as the indirect path will write into bssrdf_sd */
+
+        LocalIntersection ss_isect_private = *ss_isect;
+        subsurface_scatter_multi_setup(
+            kg, &ss_isect_private, hit, bssrdf_sd, hit_state, bssrdf_type, bssrdf_roughness);
+        *ss_isect = ss_isect_private;
+
+#  ifdef __VOLUME__
+        if (need_update_volume_stack) {
+          /* Setup ray from previous surface point to the new one. */
+          float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
+          volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+          for (int k = 0; k < VOLUME_STACK_SIZE; k++) {
+            hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k];
+          }
+
+          kernel_volume_stack_update_for_subsurface(
+              kg, emission_sd, &volume_ray, hit_state->volume_stack);
+        }
+#  endif /* __VOLUME__ */
+
+#  ifdef __EMISSION__
+        if (branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+          /* direct light */
+          if (kernel_data.integrator.use_direct_light) {
+            int all = (kernel_data.integrator.sample_all_lights_direct) ||
+                      (hit_state->flag & PATH_RAY_SHADOW_CATCHER);
+            kernel_branched_path_surface_connect_light(kg,
+                                                       bssrdf_sd,
+                                                       emission_sd,
+                                                       hit_state,
+                                                       branched_state->throughput,
+                                                       num_samples_inv,
+                                                       L,
+                                                       all);
+          }
+        }
+#  endif /* __EMISSION__ */
+
+        /* indirect light */
+        if (kernel_split_branched_path_surface_indirect_light_iter(
+                kg, ray_index, num_samples_inv, bssrdf_sd, false, false)) {
+          branched_state->ss_next_closure = i;
+          branched_state->ss_next_sample = j;
+          branched_state->next_hit = hit;
+
+          return true;
+        }
+
+        branched_state->next_closure = 0;
+      }
+
+      branched_state->next_hit = 0;
+    }
+
+    branched_state->ss_next_sample = 0;
+  }
+
+  branched_state->ss_next_closure = sd->num_closure;
+
+  branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+  if (branched_state->waiting_on_shared_samples) {
+    return true;
+  }
+
+  kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+  return false;
 }
 
-#endif  /* __BRANCHED_PATH__ && __SUBSURFACE__ */
+#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */
 
 ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
 {
-	int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	if(thread_index == 0) {
-		/* We will empty both queues in this kernel. */
-		kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
-		kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
-	}
-
-	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-	ray_index = get_ray_index(kg, ray_index,
-	                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
-	get_ray_index(kg, thread_index,
-	              QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-	              kernel_split_state.queue_data,
-	              kernel_split_params.queue_size,
-	              1);
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (thread_index == 0) {
+    /* We will empty both queues in this kernel. */
+    kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+    kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+  }
+
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
+  get_ray_index(kg,
+                thread_index,
+                QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                kernel_split_state.queue_data,
+                kernel_split_params.queue_size,
+                1);
 
 #ifdef __SUBSURFACE__
-	ccl_global char *ray_state = kernel_split_state.ray_state;
-
-	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-		ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-		ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-		ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
-		ShaderData *sd = kernel_split_sd(sd, ray_index);
-		ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
-		if(sd->flag & SD_BSSRDF) {
-
-#ifdef __BRANCHED_PATH__
-			if(!kernel_data.integrator.branched ||
-			   IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))
-			{
-#endif
-				if(kernel_path_subsurface_scatter(kg,
-				                                  sd,
-				                                  emission_sd,
-				                                  L,
-				                                  state,
-				                                  ray,
-				                                  throughput,
-				                                  ss_indirect))
-				{
-					kernel_split_path_end(kg, ray_index);
-				}
-#ifdef __BRANCHED_PATH__
-			}
-			else {
-				kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
-
-				if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
-					ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-				}
-			}
-#endif
-		}
-	}
+  ccl_global char *ray_state = kernel_split_state.ray_state;
+
+  if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+    ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+    ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+    ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+    ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+    if (sd->flag & SD_BSSRDF) {
 
 #  ifdef __BRANCHED_PATH__
-	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
-		kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
-	}
-
-	/* iter loop */
-	ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
-	                          QUEUE_SUBSURFACE_INDIRECT_ITER,
-	                          kernel_split_state.queue_data,
-	                          kernel_split_params.queue_size,
-	                          1);
-
-	if(IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
-		/* for render passes, sum and reset indirect light pass variables
-		 * for the next samples */
-		path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
-		path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
-
-		if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
-			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-		}
-	}
-#  endif  /* __BRANCHED_PATH__ */
-
-#endif  /* __SUBSURFACE__ */
+      if (!kernel_data.integrator.branched ||
+          IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#  endif
+        if (kernel_path_subsurface_scatter(
+                kg, sd, emission_sd, L, state, ray, throughput, ss_indirect)) {
+          kernel_split_path_end(kg, ray_index);
+        }
+#  ifdef __BRANCHED_PATH__
+      }
+      else {
+        kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
+
+        if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+          ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+        }
+      }
+#  endif
+    }
+  }
 
+#  ifdef __BRANCHED_PATH__
+  if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+    kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
+  }
+
+  /* iter loop */
+  ray_index = get_ray_index(kg,
+                            ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+                            QUEUE_SUBSURFACE_INDIRECT_ITER,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
+
+  if (IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
+    /* for render passes, sum and reset indirect light pass variables
+     * for the next samples */
+    path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+    path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+    if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+      ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+    }
+  }
+#  endif /* __BRANCHED_PATH__ */
+
+#endif /* __SUBSURFACE__ */
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index ccb9aef7a5b..4a386afa5de 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -46,92 +46,102 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline float3 stack_load_float3(float *stack, uint a)
 {
-	kernel_assert(a+2 < SVM_STACK_SIZE);
+  kernel_assert(a + 2 < SVM_STACK_SIZE);
 
-	return make_float3(stack[a+0], stack[a+1], stack[a+2]);
+  return make_float3(stack[a + 0], stack[a + 1], stack[a + 2]);
 }
 
 ccl_device_inline void stack_store_float3(float *stack, uint a, float3 f)
 {
-	kernel_assert(a+2 < SVM_STACK_SIZE);
+  kernel_assert(a + 2 < SVM_STACK_SIZE);
 
-	stack[a+0] = f.x;
-	stack[a+1] = f.y;
-	stack[a+2] = f.z;
+  stack[a + 0] = f.x;
+  stack[a + 1] = f.y;
+  stack[a + 2] = f.z;
 }
 
 ccl_device_inline float stack_load_float(float *stack, uint a)
 {
-	kernel_assert(a < SVM_STACK_SIZE);
+  kernel_assert(a < SVM_STACK_SIZE);
 
-	return stack[a];
+  return stack[a];
 }
 
 ccl_device_inline float stack_load_float_default(float *stack, uint a, uint value)
 {
-	return (a == (uint)SVM_STACK_INVALID)? __uint_as_float(value): stack_load_float(stack, a);
+  return (a == (uint)SVM_STACK_INVALID) ? __uint_as_float(value) : stack_load_float(stack, a);
 }
 
 ccl_device_inline void stack_store_float(float *stack, uint a, float f)
 {
-	kernel_assert(a < SVM_STACK_SIZE);
+  kernel_assert(a < SVM_STACK_SIZE);
 
-	stack[a] = f;
+  stack[a] = f;
 }
 
 ccl_device_inline int stack_load_int(float *stack, uint a)
 {
-	kernel_assert(a < SVM_STACK_SIZE);
+  kernel_assert(a < SVM_STACK_SIZE);
 
-	return __float_as_int(stack[a]);
+  return __float_as_int(stack[a]);
 }
 
 ccl_device_inline int stack_load_int_default(float *stack, uint a, uint value)
 {
-	return (a == (uint)SVM_STACK_INVALID)? (int)value: stack_load_int(stack, a);
+  return (a == (uint)SVM_STACK_INVALID) ? (int)value : stack_load_int(stack, a);
 }
 
 ccl_device_inline void stack_store_int(float *stack, uint a, int i)
 {
-	kernel_assert(a < SVM_STACK_SIZE);
+  kernel_assert(a < SVM_STACK_SIZE);
 
-	stack[a] = __int_as_float(i);
+  stack[a] = __int_as_float(i);
 }
 
 ccl_device_inline bool stack_valid(uint a)
 {
-	return a != (uint)SVM_STACK_INVALID;
+  return a != (uint)SVM_STACK_INVALID;
 }
 
 /* Reading Nodes */
 
 ccl_device_inline uint4 read_node(KernelGlobals *kg, int *offset)
 {
-	uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
-	(*offset)++;
-	return node;
+  uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
+  (*offset)++;
+  return node;
 }
 
 ccl_device_inline float4 read_node_float(KernelGlobals *kg, int *offset)
 {
-	uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
-	float4 f = make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w));
-	(*offset)++;
-	return f;
+  uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
+  float4 f = make_float4(__uint_as_float(node.x),
+                         __uint_as_float(node.y),
+                         __uint_as_float(node.z),
+                         __uint_as_float(node.w));
+  (*offset)++;
+  return f;
 }
 
 ccl_device_inline float4 fetch_node_float(KernelGlobals *kg, int offset)
 {
-	uint4 node = kernel_tex_fetch(__svm_nodes, offset);
-	return make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w));
+  uint4 node = kernel_tex_fetch(__svm_nodes, offset);
+  return make_float4(__uint_as_float(node.x),
+                     __uint_as_float(node.y),
+                     __uint_as_float(node.z),
+                     __uint_as_float(node.w));
 }
 
 ccl_device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
 {
-	if(x) *x = (i & 0xFF);
-	if(y) *y = ((i >> 8) & 0xFF);
-	if(z) *z = ((i >> 16) & 0xFF);
-	if(w) *w = ((i >> 24) & 0xFF);
+  if (x)
+    *x = (i & 0xFF);
+  if (y)
+    *y = ((i >> 8) & 0xFF);
+  if (z)
+    *z = ((i >> 16) & 0xFF);
+  if (w)
+    *w = ((i >> 24) & 0xFF);
 }
 
 CCL_NAMESPACE_END
@@ -194,302 +204,310 @@ CCL_NAMESPACE_BEGIN
 #define NODES_FEATURE(feature) ((__NODES_FEATURES__ & (feature)) != 0)
 
 /* Main Interpreter Loop */
-ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderType type, int path_flag)
+ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
+                                        ShaderData *sd,
+                                        ccl_addr_space PathState *state,
+                                        ShaderType type,
+                                        int path_flag)
 {
-	float stack[SVM_STACK_SIZE];
-	int offset = sd->shader & SHADER_MASK;
+  float stack[SVM_STACK_SIZE];
+  int offset = sd->shader & SHADER_MASK;
 
-	while(1) {
-		uint4 node = read_node(kg, &offset);
+  while (1) {
+    uint4 node = read_node(kg, &offset);
 
-		switch(node.x) {
+    switch (node.x) {
 #if NODES_GROUP(NODE_GROUP_LEVEL_0)
-			case NODE_SHADER_JUMP: {
-				if(type == SHADER_TYPE_SURFACE) offset = node.y;
-				else if(type == SHADER_TYPE_VOLUME) offset = node.z;
-				else if(type == SHADER_TYPE_DISPLACEMENT) offset = node.w;
-				else return;
-				break;
-			}
-			case NODE_CLOSURE_BSDF:
-				svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset);
-				break;
-			case NODE_CLOSURE_EMISSION:
-				svm_node_closure_emission(sd, stack, node);
-				break;
-			case NODE_CLOSURE_BACKGROUND:
-				svm_node_closure_background(sd, stack, node);
-				break;
-			case NODE_CLOSURE_SET_WEIGHT:
-				svm_node_closure_set_weight(sd, node.y, node.z, node.w);
-				break;
-			case NODE_CLOSURE_WEIGHT:
-				svm_node_closure_weight(sd, stack, node.y);
-				break;
-			case NODE_EMISSION_WEIGHT:
-				svm_node_emission_weight(kg, sd, stack, node);
-				break;
-			case NODE_MIX_CLOSURE:
-				svm_node_mix_closure(sd, stack, node);
-				break;
-			case NODE_JUMP_IF_ZERO:
-				if(stack_load_float(stack, node.z) == 0.0f)
-					offset += node.y;
-				break;
-			case NODE_JUMP_IF_ONE:
-				if(stack_load_float(stack, node.z) == 1.0f)
-					offset += node.y;
-				break;
-			case NODE_GEOMETRY:
-				svm_node_geometry(kg, sd, stack, node.y, node.z);
-				break;
-			case NODE_CONVERT:
-				svm_node_convert(kg, sd, stack, node.y, node.z, node.w);
-				break;
-			case NODE_TEX_COORD:
-				svm_node_tex_coord(kg, sd, path_flag, stack, node, &offset);
-				break;
-			case NODE_VALUE_F:
-				svm_node_value_f(kg, sd, stack, node.y, node.z);
-				break;
-			case NODE_VALUE_V:
-				svm_node_value_v(kg, sd, stack, node.y, &offset);
-				break;
-			case NODE_ATTR:
-				svm_node_attr(kg, sd, stack, node);
-				break;
+      case NODE_SHADER_JUMP: {
+        if (type == SHADER_TYPE_SURFACE)
+          offset = node.y;
+        else if (type == SHADER_TYPE_VOLUME)
+          offset = node.z;
+        else if (type == SHADER_TYPE_DISPLACEMENT)
+          offset = node.w;
+        else
+          return;
+        break;
+      }
+      case NODE_CLOSURE_BSDF:
+        svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset);
+        break;
+      case NODE_CLOSURE_EMISSION:
+        svm_node_closure_emission(sd, stack, node);
+        break;
+      case NODE_CLOSURE_BACKGROUND:
+        svm_node_closure_background(sd, stack, node);
+        break;
+      case NODE_CLOSURE_SET_WEIGHT:
+        svm_node_closure_set_weight(sd, node.y, node.z, node.w);
+        break;
+      case NODE_CLOSURE_WEIGHT:
+        svm_node_closure_weight(sd, stack, node.y);
+        break;
+      case NODE_EMISSION_WEIGHT:
+        svm_node_emission_weight(kg, sd, stack, node);
+        break;
+      case NODE_MIX_CLOSURE:
+        svm_node_mix_closure(sd, stack, node);
+        break;
+      case NODE_JUMP_IF_ZERO:
+        if (stack_load_float(stack, node.z) == 0.0f)
+          offset += node.y;
+        break;
+      case NODE_JUMP_IF_ONE:
+        if (stack_load_float(stack, node.z) == 1.0f)
+          offset += node.y;
+        break;
+      case NODE_GEOMETRY:
+        svm_node_geometry(kg, sd, stack, node.y, node.z);
+        break;
+      case NODE_CONVERT:
+        svm_node_convert(kg, sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_TEX_COORD:
+        svm_node_tex_coord(kg, sd, path_flag, stack, node, &offset);
+        break;
+      case NODE_VALUE_F:
+        svm_node_value_f(kg, sd, stack, node.y, node.z);
+        break;
+      case NODE_VALUE_V:
+        svm_node_value_v(kg, sd, stack, node.y, &offset);
+        break;
+      case NODE_ATTR:
+        svm_node_attr(kg, sd, stack, node);
+        break;
 #  if NODES_FEATURE(NODE_FEATURE_BUMP)
-			case NODE_GEOMETRY_BUMP_DX:
-				svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
-				break;
-			case NODE_GEOMETRY_BUMP_DY:
-				svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
-				break;
-			case NODE_SET_DISPLACEMENT:
-				svm_node_set_displacement(kg, sd, stack, node.y);
-				break;
-			case NODE_DISPLACEMENT:
-				svm_node_displacement(kg, sd, stack, node);
-				break;
-			case NODE_VECTOR_DISPLACEMENT:
-				svm_node_vector_displacement(kg, sd, stack, node, &offset);
-				break;
-#  endif  /* NODES_FEATURE(NODE_FEATURE_BUMP) */
+      case NODE_GEOMETRY_BUMP_DX:
+        svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
+        break;
+      case NODE_GEOMETRY_BUMP_DY:
+        svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
+        break;
+      case NODE_SET_DISPLACEMENT:
+        svm_node_set_displacement(kg, sd, stack, node.y);
+        break;
+      case NODE_DISPLACEMENT:
+        svm_node_displacement(kg, sd, stack, node);
+        break;
+      case NODE_VECTOR_DISPLACEMENT:
+        svm_node_vector_displacement(kg, sd, stack, node, &offset);
+        break;
+#  endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
 #  ifdef __TEXTURES__
-			case NODE_TEX_IMAGE:
-				svm_node_tex_image(kg, sd, stack, node);
-				break;
-			case NODE_TEX_IMAGE_BOX:
-				svm_node_tex_image_box(kg, sd, stack, node);
-				break;
-			case NODE_TEX_NOISE:
-				svm_node_tex_noise(kg, sd, stack, node, &offset);
-				break;
-#  endif  /* __TEXTURES__ */
+      case NODE_TEX_IMAGE:
+        svm_node_tex_image(kg, sd, stack, node);
+        break;
+      case NODE_TEX_IMAGE_BOX:
+        svm_node_tex_image_box(kg, sd, stack, node);
+        break;
+      case NODE_TEX_NOISE:
+        svm_node_tex_noise(kg, sd, stack, node, &offset);
+        break;
+#  endif /* __TEXTURES__ */
 #  ifdef __EXTRA_NODES__
 #    if NODES_FEATURE(NODE_FEATURE_BUMP)
-			case NODE_SET_BUMP:
-				svm_node_set_bump(kg, sd, stack, node);
-				break;
-			case NODE_ATTR_BUMP_DX:
-				svm_node_attr_bump_dx(kg, sd, stack, node);
-				break;
-			case NODE_ATTR_BUMP_DY:
-				svm_node_attr_bump_dy(kg, sd, stack, node);
-				break;
-			case NODE_TEX_COORD_BUMP_DX:
-				svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset);
-				break;
-			case NODE_TEX_COORD_BUMP_DY:
-				svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, &offset);
-				break;
-			case NODE_CLOSURE_SET_NORMAL:
-				svm_node_set_normal(kg, sd, stack, node.y, node.z);
-				break;
+      case NODE_SET_BUMP:
+        svm_node_set_bump(kg, sd, stack, node);
+        break;
+      case NODE_ATTR_BUMP_DX:
+        svm_node_attr_bump_dx(kg, sd, stack, node);
+        break;
+      case NODE_ATTR_BUMP_DY:
+        svm_node_attr_bump_dy(kg, sd, stack, node);
+        break;
+      case NODE_TEX_COORD_BUMP_DX:
+        svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset);
+        break;
+      case NODE_TEX_COORD_BUMP_DY:
+        svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, &offset);
+        break;
+      case NODE_CLOSURE_SET_NORMAL:
+        svm_node_set_normal(kg, sd, stack, node.y, node.z);
+        break;
 #      if NODES_FEATURE(NODE_FEATURE_BUMP_STATE)
-			case NODE_ENTER_BUMP_EVAL:
-				svm_node_enter_bump_eval(kg, sd, stack, node.y);
-				break;
-			case NODE_LEAVE_BUMP_EVAL:
-				svm_node_leave_bump_eval(kg, sd, stack, node.y);
-				break;
-#      endif  /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */
-#    endif  /* NODES_FEATURE(NODE_FEATURE_BUMP) */
-			case NODE_HSV:
-				svm_node_hsv(kg, sd, stack, node, &offset);
-				break;
-#  endif  /* __EXTRA_NODES__ */
-#endif  /* NODES_GROUP(NODE_GROUP_LEVEL_0) */
+      case NODE_ENTER_BUMP_EVAL:
+        svm_node_enter_bump_eval(kg, sd, stack, node.y);
+        break;
+      case NODE_LEAVE_BUMP_EVAL:
+        svm_node_leave_bump_eval(kg, sd, stack, node.y);
+        break;
+#      endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */
+#    endif   /* NODES_FEATURE(NODE_FEATURE_BUMP) */
+      case NODE_HSV:
+        svm_node_hsv(kg, sd, stack, node, &offset);
+        break;
+#  endif /* __EXTRA_NODES__ */
+#endif   /* NODES_GROUP(NODE_GROUP_LEVEL_0) */
 
 #if NODES_GROUP(NODE_GROUP_LEVEL_1)
-			case NODE_CLOSURE_HOLDOUT:
-				svm_node_closure_holdout(sd, stack, node);
-				break;
-			case NODE_FRESNEL:
-				svm_node_fresnel(sd, stack, node.y, node.z, node.w);
-				break;
-			case NODE_LAYER_WEIGHT:
-				svm_node_layer_weight(sd, stack, node);
-				break;
+      case NODE_CLOSURE_HOLDOUT:
+        svm_node_closure_holdout(sd, stack, node);
+        break;
+      case NODE_FRESNEL:
+        svm_node_fresnel(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_LAYER_WEIGHT:
+        svm_node_layer_weight(sd, stack, node);
+        break;
 #  if NODES_FEATURE(NODE_FEATURE_VOLUME)
-			case NODE_CLOSURE_VOLUME:
-				svm_node_closure_volume(kg, sd, stack, node, type);
-				break;
-			case NODE_PRINCIPLED_VOLUME:
-				svm_node_principled_volume(kg, sd, stack, node, type, path_flag, &offset);
-				break;
-#  endif  /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
+      case NODE_CLOSURE_VOLUME:
+        svm_node_closure_volume(kg, sd, stack, node, type);
+        break;
+      case NODE_PRINCIPLED_VOLUME:
+        svm_node_principled_volume(kg, sd, stack, node, type, path_flag, &offset);
+        break;
+#  endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
 #  ifdef __EXTRA_NODES__
-			case NODE_MATH:
-				svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
-				break;
-			case NODE_VECTOR_MATH:
-				svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset);
-				break;
-			case NODE_RGB_RAMP:
-				svm_node_rgb_ramp(kg, sd, stack, node, &offset);
-				break;
-			case NODE_GAMMA:
-				svm_node_gamma(sd, stack, node.y, node.z, node.w);
-				break;
-			case NODE_BRIGHTCONTRAST:
-				svm_node_brightness(sd, stack, node.y, node.z, node.w);
-				break;
-			case NODE_LIGHT_PATH:
-				svm_node_light_path(sd, state, stack, node.y, node.z, path_flag);
-				break;
-			case NODE_OBJECT_INFO:
-				svm_node_object_info(kg, sd, stack, node.y, node.z);
-				break;
-			case NODE_PARTICLE_INFO:
-				svm_node_particle_info(kg, sd, stack, node.y, node.z);
-				break;
+      case NODE_MATH:
+        svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
+        break;
+      case NODE_VECTOR_MATH:
+        svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset);
+        break;
+      case NODE_RGB_RAMP:
+        svm_node_rgb_ramp(kg, sd, stack, node, &offset);
+        break;
+      case NODE_GAMMA:
+        svm_node_gamma(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_BRIGHTCONTRAST:
+        svm_node_brightness(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_LIGHT_PATH:
+        svm_node_light_path(sd, state, stack, node.y, node.z, path_flag);
+        break;
+      case NODE_OBJECT_INFO:
+        svm_node_object_info(kg, sd, stack, node.y, node.z);
+        break;
+      case NODE_PARTICLE_INFO:
+        svm_node_particle_info(kg, sd, stack, node.y, node.z);
+        break;
 #    ifdef __HAIR__
 #      if NODES_FEATURE(NODE_FEATURE_HAIR)
-			case NODE_HAIR_INFO:
-				svm_node_hair_info(kg, sd, stack, node.y, node.z);
-				break;
-#      endif  /* NODES_FEATURE(NODE_FEATURE_HAIR) */
-#    endif  /* __HAIR__ */
-#  endif  /* __EXTRA_NODES__ */
-#endif  /* NODES_GROUP(NODE_GROUP_LEVEL_1) */
+      case NODE_HAIR_INFO:
+        svm_node_hair_info(kg, sd, stack, node.y, node.z);
+        break;
+#      endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */
+#    endif   /* __HAIR__ */
+#  endif     /* __EXTRA_NODES__ */
+#endif       /* NODES_GROUP(NODE_GROUP_LEVEL_1) */
 
 #if NODES_GROUP(NODE_GROUP_LEVEL_2)
-			case NODE_MAPPING:
-				svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
-				break;
-			case NODE_MIN_MAX:
-				svm_node_min_max(kg, sd, stack, node.y, node.z, &offset);
-				break;
-			case NODE_CAMERA:
-				svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
-				break;
+      case NODE_MAPPING:
+        svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
+        break;
+      case NODE_MIN_MAX:
+        svm_node_min_max(kg, sd, stack, node.y, node.z, &offset);
+        break;
+      case NODE_CAMERA:
+        svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
+        break;
 #  ifdef __TEXTURES__
-			case NODE_TEX_ENVIRONMENT:
-				svm_node_tex_environment(kg, sd, stack, node);
-				break;
-			case NODE_TEX_SKY:
-				svm_node_tex_sky(kg, sd, stack, node, &offset);
-				break;
-			case NODE_TEX_GRADIENT:
-				svm_node_tex_gradient(sd, stack, node);
-				break;
-			case NODE_TEX_VORONOI:
-				svm_node_tex_voronoi(kg, sd, stack, node, &offset);
-				break;
-			case NODE_TEX_MUSGRAVE:
-				svm_node_tex_musgrave(kg, sd, stack, node, &offset);
-				break;
-			case NODE_TEX_WAVE:
-				svm_node_tex_wave(kg, sd, stack, node, &offset);
-				break;
-			case NODE_TEX_MAGIC:
-				svm_node_tex_magic(kg, sd, stack, node, &offset);
-				break;
-			case NODE_TEX_CHECKER:
-				svm_node_tex_checker(kg, sd, stack, node);
-				break;
-			case NODE_TEX_BRICK:
-				svm_node_tex_brick(kg, sd, stack, node, &offset);
-				break;
-#  endif  /* __TEXTURES__ */
+      case NODE_TEX_ENVIRONMENT:
+        svm_node_tex_environment(kg, sd, stack, node);
+        break;
+      case NODE_TEX_SKY:
+        svm_node_tex_sky(kg, sd, stack, node, &offset);
+        break;
+      case NODE_TEX_GRADIENT:
+        svm_node_tex_gradient(sd, stack, node);
+        break;
+      case NODE_TEX_VORONOI:
+        svm_node_tex_voronoi(kg, sd, stack, node, &offset);
+        break;
+      case NODE_TEX_MUSGRAVE:
+        svm_node_tex_musgrave(kg, sd, stack, node, &offset);
+        break;
+      case NODE_TEX_WAVE:
+        svm_node_tex_wave(kg, sd, stack, node, &offset);
+        break;
+      case NODE_TEX_MAGIC:
+        svm_node_tex_magic(kg, sd, stack, node, &offset);
+        break;
+      case NODE_TEX_CHECKER:
+        svm_node_tex_checker(kg, sd, stack, node);
+        break;
+      case NODE_TEX_BRICK:
+        svm_node_tex_brick(kg, sd, stack, node, &offset);
+        break;
+#  endif /* __TEXTURES__ */
 #  ifdef __EXTRA_NODES__
-			case NODE_NORMAL:
-				svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
-				break;
-			case NODE_LIGHT_FALLOFF:
-				svm_node_light_falloff(sd, stack, node);
-				break;
-			case NODE_IES:
-				svm_node_ies(kg, sd, stack, node, &offset);
-				break;
-#  endif  /* __EXTRA_NODES__ */
-#endif  /* NODES_GROUP(NODE_GROUP_LEVEL_2) */
+      case NODE_NORMAL:
+        svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
+        break;
+      case NODE_LIGHT_FALLOFF:
+        svm_node_light_falloff(sd, stack, node);
+        break;
+      case NODE_IES:
+        svm_node_ies(kg, sd, stack, node, &offset);
+        break;
+#  endif /* __EXTRA_NODES__ */
+#endif   /* NODES_GROUP(NODE_GROUP_LEVEL_2) */
 
 #if NODES_GROUP(NODE_GROUP_LEVEL_3)
-			case NODE_RGB_CURVES:
-			case NODE_VECTOR_CURVES:
-				svm_node_curves(kg, sd, stack, node, &offset);
-				break;
-			case NODE_TANGENT:
-				svm_node_tangent(kg, sd, stack, node);
-				break;
-			case NODE_NORMAL_MAP:
-				svm_node_normal_map(kg, sd, stack, node);
-				break;
+      case NODE_RGB_CURVES:
+      case NODE_VECTOR_CURVES:
+        svm_node_curves(kg, sd, stack, node, &offset);
+        break;
+      case NODE_TANGENT:
+        svm_node_tangent(kg, sd, stack, node);
+        break;
+      case NODE_NORMAL_MAP:
+        svm_node_normal_map(kg, sd, stack, node);
+        break;
 #  ifdef __EXTRA_NODES__
-			case NODE_INVERT:
-				svm_node_invert(sd, stack, node.y, node.z, node.w);
-				break;
-			case NODE_MIX:
-				svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset);
-				break;
-			case NODE_SEPARATE_VECTOR:
-				svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
-				break;
-			case NODE_COMBINE_VECTOR:
-				svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
-				break;
-			case NODE_SEPARATE_HSV:
-				svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
-				break;
-			case NODE_COMBINE_HSV:
-				svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
-				break;
-			case NODE_VECTOR_TRANSFORM:
-				svm_node_vector_transform(kg, sd, stack, node);
-				break;
-			case NODE_WIREFRAME:
-				svm_node_wireframe(kg, sd, stack, node);
-				break;
-			case NODE_WAVELENGTH:
-				svm_node_wavelength(kg, sd, stack, node.y, node.z);
-				break;
-			case NODE_BLACKBODY:
-				svm_node_blackbody(kg, sd, stack, node.y, node.z);
-				break;
-#  endif  /* __EXTRA_NODES__ */
+      case NODE_INVERT:
+        svm_node_invert(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_MIX:
+        svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset);
+        break;
+      case NODE_SEPARATE_VECTOR:
+        svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_COMBINE_VECTOR:
+        svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_SEPARATE_HSV:
+        svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
+        break;
+      case NODE_COMBINE_HSV:
+        svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
+        break;
+      case NODE_VECTOR_TRANSFORM:
+        svm_node_vector_transform(kg, sd, stack, node);
+        break;
+      case NODE_WIREFRAME:
+        svm_node_wireframe(kg, sd, stack, node);
+        break;
+      case NODE_WAVELENGTH:
+        svm_node_wavelength(kg, sd, stack, node.y, node.z);
+        break;
+      case NODE_BLACKBODY:
+        svm_node_blackbody(kg, sd, stack, node.y, node.z);
+        break;
+#  endif /* __EXTRA_NODES__ */
 #  if NODES_FEATURE(NODE_FEATURE_VOLUME)
-			case NODE_TEX_VOXEL:
-				svm_node_tex_voxel(kg, sd, stack, node, &offset);
-				break;
-#  endif  /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
+      case NODE_TEX_VOXEL:
+        svm_node_tex_voxel(kg, sd, stack, node, &offset);
+        break;
+#  endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
 #  ifdef __SHADER_RAYTRACE__
-			case NODE_BEVEL:
-				svm_node_bevel(kg, sd, state, stack, node);
-				break;
-			case NODE_AMBIENT_OCCLUSION:
-				svm_node_ao(kg, sd, state, stack, node);
-				break;
-#  endif  /* __SHADER_RAYTRACE__ */
-#endif  /* NODES_GROUP(NODE_GROUP_LEVEL_3) */
-			case NODE_END:
-				return;
-			default:
-				kernel_assert(!"Unknown node type was passed to the SVM machine");
-				return;
-		}
-	}
+      case NODE_BEVEL:
+        svm_node_bevel(kg, sd, state, stack, node);
+        break;
+      case NODE_AMBIENT_OCCLUSION:
+        svm_node_ao(kg, sd, state, stack, node);
+        break;
+#  endif /* __SHADER_RAYTRACE__ */
+#endif   /* NODES_GROUP(NODE_GROUP_LEVEL_3) */
+      case NODE_END:
+        return;
+      default:
+        kernel_assert(!"Unknown node type was passed to the SVM machine");
+        return;
+    }
+  }
 }
 
 #undef NODES_GROUP
@@ -497,4 +515,4 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a
 
 CCL_NAMESPACE_END
 
-#endif  /* __SVM_H__ */
+#endif /* __SVM_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h
index 0744ec1768f..06076175c40 100644
--- a/intern/cycles/kernel/svm/svm_ao.h
+++ b/intern/cycles/kernel/svm/svm_ao.h
@@ -24,95 +24,82 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg,
                                  int num_samples,
                                  int flags)
 {
-	if(flags & NODE_AO_GLOBAL_RADIUS) {
-		max_dist = kernel_data.background.ao_distance;
-	}
-
-	/* Early out if no sampling needed. */
-	if(max_dist <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
-		return 1.0f;
-	}
-
-	/* Can't raytrace from shaders like displacement, before BVH exists. */
-	if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
-		return 1.0f;
-	}
-
-	if(flags & NODE_AO_INSIDE) {
-		N = -N;
-	}
-
-	float3 T, B;
-	make_orthonormals(N, &T, &B);
-
-	int unoccluded = 0;
-	for(int sample = 0; sample < num_samples; sample++) {
-		float disk_u, disk_v;
-		path_branched_rng_2D(kg, state->rng_hash, state, sample, num_samples,
-		                     PRNG_BEVEL_U, &disk_u, &disk_v);
-
-		float2 d = concentric_sample_disk(disk_u, disk_v);
-		float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d)));
-
-		/* Create ray. */
-		Ray ray;
-		ray.P = ray_offset(sd->P, N);
-		ray.D = D.x*T + D.y*B + D.z*N;
-		ray.t = max_dist;
-		ray.time = sd->time;
-		ray.dP = sd->dP;
-		ray.dD = differential3_zero();
-
-		if(flags & NODE_AO_ONLY_LOCAL) {
-			if(!scene_intersect_local(kg,
-			                          ray,
-			                          NULL,
-			                          sd->object,
-			                          NULL,
-			                          0)) {
-				unoccluded++;
-			}
-		}
-		else {
-			Intersection isect;
-			if(!scene_intersect(kg,
-			                    ray,
-			                    PATH_RAY_SHADOW_OPAQUE,
-			                    &isect,
-			                    NULL,
-			                    0.0f, 0.0f)) {
-				unoccluded++;
-			}
-		}
-	}
-
-	return ((float) unoccluded) / num_samples;
+  if (flags & NODE_AO_GLOBAL_RADIUS) {
+    max_dist = kernel_data.background.ao_distance;
+  }
+
+  /* Early out if no sampling needed. */
+  if (max_dist <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
+    return 1.0f;
+  }
+
+  /* Can't raytrace from shaders like displacement, before BVH exists. */
+  if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
+    return 1.0f;
+  }
+
+  if (flags & NODE_AO_INSIDE) {
+    N = -N;
+  }
+
+  float3 T, B;
+  make_orthonormals(N, &T, &B);
+
+  int unoccluded = 0;
+  for (int sample = 0; sample < num_samples; sample++) {
+    float disk_u, disk_v;
+    path_branched_rng_2D(
+        kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
+
+    float2 d = concentric_sample_disk(disk_u, disk_v);
+    float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d)));
+
+    /* Create ray. */
+    Ray ray;
+    ray.P = ray_offset(sd->P, N);
+    ray.D = D.x * T + D.y * B + D.z * N;
+    ray.t = max_dist;
+    ray.time = sd->time;
+    ray.dP = sd->dP;
+    ray.dD = differential3_zero();
+
+    if (flags & NODE_AO_ONLY_LOCAL) {
+      if (!scene_intersect_local(kg, ray, NULL, sd->object, NULL, 0)) {
+        unoccluded++;
+      }
+    }
+    else {
+      Intersection isect;
+      if (!scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f)) {
+        unoccluded++;
+      }
+    }
+  }
+
+  return ((float)unoccluded) / num_samples;
 }
 
-ccl_device void svm_node_ao(KernelGlobals *kg,
-                            ShaderData *sd,
-                            ccl_addr_space PathState *state,
-                            float *stack,
-                            uint4 node)
+ccl_device void svm_node_ao(
+    KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
 {
-	uint flags, dist_offset, normal_offset, out_ao_offset;
-	decode_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
+  uint flags, dist_offset, normal_offset, out_ao_offset;
+  decode_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
 
-	uint color_offset, out_color_offset, samples;
-	decode_node_uchar4(node.z, &color_offset, &out_color_offset, &samples, NULL);
+  uint color_offset, out_color_offset, samples;
+  decode_node_uchar4(node.z, &color_offset, &out_color_offset, &samples, NULL);
 
-	float dist = stack_load_float_default(stack, dist_offset, node.w);
-	float3 normal = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N;
-	float ao = svm_ao(kg, sd, normal, state, dist, samples, flags);
+  float dist = stack_load_float_default(stack, dist_offset, node.w);
+  float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
+  float ao = svm_ao(kg, sd, normal, state, dist, samples, flags);
 
-	if(stack_valid(out_ao_offset)) {
-		stack_store_float(stack, out_ao_offset, ao);
-	}
+  if (stack_valid(out_ao_offset)) {
+    stack_store_float(stack, out_ao_offset, ao);
+  }
 
-	if(stack_valid(out_color_offset)) {
-		float3 color = stack_load_float3(stack, color_offset);
-		stack_store_float3(stack, out_color_offset, ao * color);
-	}
+  if (stack_valid(out_color_offset)) {
+    float3 color = stack_load_float3(stack, color_offset);
+    stack_store_float3(stack, out_color_offset, ao * color);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index c2366df71d0..a67cfe91a30 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -18,67 +18,66 @@ CCL_NAMESPACE_BEGIN
 
 /* Attribute Node */
 
-ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
-	uint4 node, NodeAttributeType *type,
-	uint *out_offset)
+ccl_device AttributeDescriptor svm_node_attr_init(
+    KernelGlobals *kg, ShaderData *sd, uint4 node, NodeAttributeType *type, uint *out_offset)
 {
-	*out_offset = node.z;
-	*type = (NodeAttributeType)node.w;
+  *out_offset = node.z;
+  *type = (NodeAttributeType)node.w;
 
-	AttributeDescriptor desc;
+  AttributeDescriptor desc;
 
-	if(sd->object != OBJECT_NONE) {
-		desc = find_attribute(kg, sd, node.y);
-		if(desc.offset == ATTR_STD_NOT_FOUND) {
-			desc = attribute_not_found();
-			desc.offset = 0;
-			desc.type = (NodeAttributeType)node.w;
-		}
-	}
-	else {
-		/* background */
-		desc = attribute_not_found();
-		desc.offset = 0;
-		desc.type = (NodeAttributeType)node.w;
-	}
+  if (sd->object != OBJECT_NONE) {
+    desc = find_attribute(kg, sd, node.y);
+    if (desc.offset == ATTR_STD_NOT_FOUND) {
+      desc = attribute_not_found();
+      desc.offset = 0;
+      desc.type = (NodeAttributeType)node.w;
+    }
+  }
+  else {
+    /* background */
+    desc = attribute_not_found();
+    desc.offset = 0;
+    desc.type = (NodeAttributeType)node.w;
+  }
 
-	return desc;
+  return desc;
 }
 
 ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-	NodeAttributeType type;
-	uint out_offset;
-	AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
+  NodeAttributeType type;
+  uint out_offset;
+  AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
 
-	/* fetch and store attribute */
-	if(desc.type == NODE_ATTR_FLOAT) {
-		float f = primitive_attribute_float(kg, sd, desc, NULL, NULL);
-		if(type == NODE_ATTR_FLOAT) {
-			stack_store_float(stack, out_offset, f);
-		}
-		else {
-			stack_store_float3(stack, out_offset, make_float3(f, f, f));
-		}
-	}
-	else if(desc.type == NODE_ATTR_FLOAT2) {
-		float2 f = primitive_attribute_float2(kg, sd, desc, NULL, NULL);
-		if(type == NODE_ATTR_FLOAT) {
-			stack_store_float(stack, out_offset, f.x);
-		}
-		else {
-			stack_store_float3(stack, out_offset, make_float3(f.x, f.y, 0.0f));
-		}
-	}
-	else {
-		float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
-		if(type == NODE_ATTR_FLOAT) {
-			stack_store_float(stack, out_offset, average(f));
-		}
-		else {
-			stack_store_float3(stack, out_offset, f);
-		}
-	}
+  /* fetch and store attribute */
+  if (desc.type == NODE_ATTR_FLOAT) {
+    float f = primitive_attribute_float(kg, sd, desc, NULL, NULL);
+    if (type == NODE_ATTR_FLOAT) {
+      stack_store_float(stack, out_offset, f);
+    }
+    else {
+      stack_store_float3(stack, out_offset, make_float3(f, f, f));
+    }
+  }
+  else if (desc.type == NODE_ATTR_FLOAT2) {
+    float2 f = primitive_attribute_float2(kg, sd, desc, NULL, NULL);
+    if (type == NODE_ATTR_FLOAT) {
+      stack_store_float(stack, out_offset, f.x);
+    }
+    else {
+      stack_store_float3(stack, out_offset, make_float3(f.x, f.y, 0.0f));
+    }
+  }
+  else {
+    float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
+    if (type == NODE_ATTR_FLOAT) {
+      stack_store_float(stack, out_offset, average(f));
+    }
+    else {
+      stack_store_float3(stack, out_offset, f);
+    }
+  }
 }
 
 #ifndef __KERNEL_CUDA__
@@ -86,43 +85,44 @@ ccl_device
 #else
 ccl_device_noinline
 #endif
-void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+    void
+    svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-	NodeAttributeType type;
-	uint out_offset;
-	AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
+  NodeAttributeType type;
+  uint out_offset;
+  AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
 
-	/* fetch and store attribute */
-	if(desc.type == NODE_ATTR_FLOAT) {
-		float dx;
-		float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL);
-		if(type == NODE_ATTR_FLOAT) {
-			stack_store_float(stack, out_offset, f+dx);
-		}
-		else {
-			stack_store_float3(stack, out_offset, make_float3(f+dx, f+dx, f+dx));
-		}
-	}
-	else if(desc.type == NODE_ATTR_FLOAT2) {
-		float2 dx;
-		float2 f = primitive_attribute_float2(kg, sd, desc, &dx, NULL);
-		if (type == NODE_ATTR_FLOAT) {
-			stack_store_float(stack, out_offset, f.x + dx.x);
-		}
-		else {
-			stack_store_float3(stack, out_offset, make_float3(f.x+dx.x, f.y+dx.y, 0.0f));
-		}
-	}
-	else {
-		float3 dx;
-		float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL);
-		if(type == NODE_ATTR_FLOAT) {
-			stack_store_float(stack, out_offset, average(f+dx));
-		}
-		else {
-			stack_store_float3(stack, out_offset, f+dx);
-		}
-	}
+  /* fetch and store attribute */
+  if (desc.type == NODE_ATTR_FLOAT) {
+    float dx;
+    float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL);
+    if (type == NODE_ATTR_FLOAT) {
+      stack_store_float(stack, out_offset, f + dx);
+    }
+    else {
+      stack_store_float3(stack, out_offset, make_float3(f + dx, f + dx, f + dx));
+    }
+  }
+  else if (desc.type == NODE_ATTR_FLOAT2) {
+    float2 dx;
+    float2 f = primitive_attribute_float2(kg, sd, desc, &dx, NULL);
+    if (type == NODE_ATTR_FLOAT) {
+      stack_store_float(stack, out_offset, f.x + dx.x);
+    }
+    else {
+      stack_store_float3(stack, out_offset, make_float3(f.x + dx.x, f.y + dx.y, 0.0f));
+    }
+  }
+  else {
+    float3 dx;
+    float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL);
+    if (type == NODE_ATTR_FLOAT) {
+      stack_store_float(stack, out_offset, average(f + dx));
+    }
+    else {
+      stack_store_float3(stack, out_offset, f + dx);
+    }
+  }
 }
 
 #ifndef __KERNEL_CUDA__
@@ -130,46 +130,44 @@ ccl_device
 #else
 ccl_device_noinline
 #endif
-void svm_node_attr_bump_dy(KernelGlobals *kg,
-                           ShaderData *sd,
-                           float *stack,
-                           uint4 node)
+    void
+    svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-	NodeAttributeType type;
-	uint out_offset;
-	AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
+  NodeAttributeType type;
+  uint out_offset;
+  AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
 
-	/* fetch and store attribute */
-	if(desc.type == NODE_ATTR_FLOAT) {
-		float dy;
-		float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy);
-		if(type == NODE_ATTR_FLOAT) {
-			stack_store_float(stack, out_offset, f+dy);
-		}
-		else {
-			stack_store_float3(stack, out_offset, make_float3(f+dy, f+dy, f+dy));
-		}
-	}
-	else if(desc.type == NODE_ATTR_FLOAT2) {
-		float2 dy;
-		float2 f = primitive_attribute_float2(kg, sd, desc, NULL, &dy);
-		if(type == NODE_ATTR_FLOAT) {
-			stack_store_float(stack, out_offset, f.x + dy.x);
-		}
-		else {
-			stack_store_float3(stack, out_offset, make_float3(f.x+dy.x, f.y+dy.y, 0.0f));
-		}
-	}
-	else {
-		float3 dy;
-		float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy);
-		if(type == NODE_ATTR_FLOAT) {
-			stack_store_float(stack, out_offset, average(f+dy));
-		}
-		else {
-			stack_store_float3(stack, out_offset, f+dy);
-		}
-	}
+  /* fetch and store attribute */
+  if (desc.type == NODE_ATTR_FLOAT) {
+    float dy;
+    float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy);
+    if (type == NODE_ATTR_FLOAT) {
+      stack_store_float(stack, out_offset, f + dy);
+    }
+    else {
+      stack_store_float3(stack, out_offset, make_float3(f + dy, f + dy, f + dy));
+    }
+  }
+  else if (desc.type == NODE_ATTR_FLOAT2) {
+    float2 dy;
+    float2 f = primitive_attribute_float2(kg, sd, desc, NULL, &dy);
+    if (type == NODE_ATTR_FLOAT) {
+      stack_store_float(stack, out_offset, f.x + dy.x);
+    }
+    else {
+      stack_store_float3(stack, out_offset, make_float3(f.x + dy.x, f.y + dy.y, 0.0f));
+    }
+  }
+  else {
+    float3 dy;
+    float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy);
+    if (type == NODE_ATTR_FLOAT) {
+      stack_store_float(stack, out_offset, average(f + dy));
+    }
+    else {
+      stack_store_float3(stack, out_offset, f + dy);
+    }
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h
index b5bb9df422b..fcf28e96e98 100644
--- a/intern/cycles/kernel/svm/svm_bevel.h
+++ b/intern/cycles/kernel/svm/svm_bevel.h
@@ -22,215 +22,196 @@ CCL_NAMESPACE_BEGIN
  * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
  */
 
-ccl_device_noinline float3 svm_bevel(
-	KernelGlobals *kg,
-	ShaderData *sd,
-	ccl_addr_space PathState *state,
-	float radius,
-	int num_samples)
+ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
+                                     ShaderData *sd,
+                                     ccl_addr_space PathState *state,
+                                     float radius,
+                                     int num_samples)
 {
-	/* Early out if no sampling needed. */
-	if(radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
-		return sd->N;
-	}
-
-	/* Can't raytrace from shaders like displacement, before BVH exists. */
-	if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
-		return sd->N;
-	}
-
-	/* Don't bevel for blurry indirect rays. */
-	if(state->min_ray_pdf < 8.0f) {
-		return sd->N;
-	}
-
-	/* Setup for multi intersection. */
-	LocalIntersection isect;
-	uint lcg_state = lcg_state_init_addrspace(state, 0x64c6a40e);
-
-	/* Sample normals from surrounding points on surface. */
-	float3 sum_N = make_float3(0.0f, 0.0f, 0.0f);
-
-	for(int sample = 0; sample < num_samples; sample++) {
-		float disk_u, disk_v;
-		path_branched_rng_2D(kg, state->rng_hash, state, sample, num_samples,
-		                     PRNG_BEVEL_U, &disk_u, &disk_v);
-
-		/* Pick random axis in local frame and point on disk. */
-		float3 disk_N, disk_T, disk_B;
-		float pick_pdf_N, pick_pdf_T, pick_pdf_B;
-
-		disk_N = sd->Ng;
-		make_orthonormals(disk_N, &disk_T, &disk_B);
-
-		float axisu = disk_u;
-
-		if(axisu < 0.5f) {
-			pick_pdf_N = 0.5f;
-			pick_pdf_T = 0.25f;
-			pick_pdf_B = 0.25f;
-			disk_u *= 2.0f;
-		}
-		else if(axisu < 0.75f) {
-			float3 tmp = disk_N;
-			disk_N = disk_T;
-			disk_T = tmp;
-			pick_pdf_N = 0.25f;
-			pick_pdf_T = 0.5f;
-			pick_pdf_B = 0.25f;
-			disk_u = (disk_u - 0.5f)*4.0f;
-		}
-		else {
-			float3 tmp = disk_N;
-			disk_N = disk_B;
-			disk_B = tmp;
-			pick_pdf_N = 0.25f;
-			pick_pdf_T = 0.25f;
-			pick_pdf_B = 0.5f;
-			disk_u = (disk_u - 0.75f)*4.0f;
-		}
-
-		/* Sample point on disk. */
-		float phi = M_2PI_F * disk_u;
-		float disk_r = disk_v;
-		float disk_height;
-
-		/* Perhaps find something better than Cubic BSSRDF, but happens to work well. */
-		bssrdf_cubic_sample(radius, 0.0f, disk_r, &disk_r, &disk_height);
-
-		float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
-
-		/* Create ray. */
-		Ray *ray = &isect.ray;
-		ray->P = sd->P + disk_N*disk_height + disk_P;
-		ray->D = -disk_N;
-		ray->t = 2.0f*disk_height;
-		ray->dP = sd->dP;
-		ray->dD = differential3_zero();
-		ray->time = sd->time;
-
-		/* Intersect with the same object. if multiple intersections are found it
-		 * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */
-		scene_intersect_local(kg,
-		                      *ray,
-		                      &isect,
-		                      sd->object,
-		                      &lcg_state,
-		                      LOCAL_MAX_HITS);
-
-		int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS);
-
-		for(int hit = 0; hit < num_eval_hits; hit++) {
-			/* Quickly retrieve P and Ng without setting up ShaderData. */
-			float3 hit_P;
-			if(sd->type & PRIMITIVE_TRIANGLE) {
-				hit_P = triangle_refine_local(kg,
-				                              sd,
-				                              &isect.hits[hit],
-				                              ray);
-			}
+  /* Early out if no sampling needed. */
+  if (radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
+    return sd->N;
+  }
+
+  /* Can't raytrace from shaders like displacement, before BVH exists. */
+  if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
+    return sd->N;
+  }
+
+  /* Don't bevel for blurry indirect rays. */
+  if (state->min_ray_pdf < 8.0f) {
+    return sd->N;
+  }
+
+  /* Setup for multi intersection. */
+  LocalIntersection isect;
+  uint lcg_state = lcg_state_init_addrspace(state, 0x64c6a40e);
+
+  /* Sample normals from surrounding points on surface. */
+  float3 sum_N = make_float3(0.0f, 0.0f, 0.0f);
+
+  for (int sample = 0; sample < num_samples; sample++) {
+    float disk_u, disk_v;
+    path_branched_rng_2D(
+        kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
+
+    /* Pick random axis in local frame and point on disk. */
+    float3 disk_N, disk_T, disk_B;
+    float pick_pdf_N, pick_pdf_T, pick_pdf_B;
+
+    disk_N = sd->Ng;
+    make_orthonormals(disk_N, &disk_T, &disk_B);
+
+    float axisu = disk_u;
+
+    if (axisu < 0.5f) {
+      pick_pdf_N = 0.5f;
+      pick_pdf_T = 0.25f;
+      pick_pdf_B = 0.25f;
+      disk_u *= 2.0f;
+    }
+    else if (axisu < 0.75f) {
+      float3 tmp = disk_N;
+      disk_N = disk_T;
+      disk_T = tmp;
+      pick_pdf_N = 0.25f;
+      pick_pdf_T = 0.5f;
+      pick_pdf_B = 0.25f;
+      disk_u = (disk_u - 0.5f) * 4.0f;
+    }
+    else {
+      float3 tmp = disk_N;
+      disk_N = disk_B;
+      disk_B = tmp;
+      pick_pdf_N = 0.25f;
+      pick_pdf_T = 0.25f;
+      pick_pdf_B = 0.5f;
+      disk_u = (disk_u - 0.75f) * 4.0f;
+    }
+
+    /* Sample point on disk. */
+    float phi = M_2PI_F * disk_u;
+    float disk_r = disk_v;
+    float disk_height;
+
+    /* Perhaps find something better than Cubic BSSRDF, but happens to work well. */
+    bssrdf_cubic_sample(radius, 0.0f, disk_r, &disk_r, &disk_height);
+
+    float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
+
+    /* Create ray. */
+    Ray *ray = &isect.ray;
+    ray->P = sd->P + disk_N * disk_height + disk_P;
+    ray->D = -disk_N;
+    ray->t = 2.0f * disk_height;
+    ray->dP = sd->dP;
+    ray->dD = differential3_zero();
+    ray->time = sd->time;
+
+    /* Intersect with the same object. if multiple intersections are found it
+     * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */
+    scene_intersect_local(kg, *ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS);
+
+    int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS);
+
+    for (int hit = 0; hit < num_eval_hits; hit++) {
+      /* Quickly retrieve P and Ng without setting up ShaderData. */
+      float3 hit_P;
+      if (sd->type & PRIMITIVE_TRIANGLE) {
+        hit_P = triangle_refine_local(kg, sd, &isect.hits[hit], ray);
+      }
 #ifdef __OBJECT_MOTION__
-			else  if(sd->type & PRIMITIVE_MOTION_TRIANGLE) {
-				float3 verts[3];
-				motion_triangle_vertices(
-					kg,
-					sd->object,
-					kernel_tex_fetch(__prim_index, isect.hits[hit].prim),
-					sd->time,
-					verts);
-				hit_P = motion_triangle_refine_local(kg,
-				                                     sd,
-				                                     &isect.hits[hit],
-				                                     ray,
-				                                     verts);
-			}
-#endif  /* __OBJECT_MOTION__ */
-
-			/* Get geometric normal. */
-			float3 hit_Ng = isect.Ng[hit];
-			int object = (isect.hits[hit].object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, isect.hits[hit].prim): isect.hits[hit].object;
-			int object_flag = kernel_tex_fetch(__object_flag, object);
-			if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-				hit_Ng = -hit_Ng;
-			}
-
-			/* Compute smooth normal. */
-			float3 N = hit_Ng;
-			int prim = kernel_tex_fetch(__prim_index, isect.hits[hit].prim);
-			int shader = kernel_tex_fetch(__tri_shader, prim);
-
-			if(shader & SHADER_SMOOTH_NORMAL) {
-				float u = isect.hits[hit].u;
-				float v = isect.hits[hit].v;
-
-				if(sd->type & PRIMITIVE_TRIANGLE) {
-					N = triangle_smooth_normal(kg, N, prim, u, v);
-				}
+      else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
+        float3 verts[3];
+        motion_triangle_vertices(
+            kg, sd->object, kernel_tex_fetch(__prim_index, isect.hits[hit].prim), sd->time, verts);
+        hit_P = motion_triangle_refine_local(kg, sd, &isect.hits[hit], ray, verts);
+      }
+#endif /* __OBJECT_MOTION__ */
+
+      /* Get geometric normal. */
+      float3 hit_Ng = isect.Ng[hit];
+      int object = (isect.hits[hit].object == OBJECT_NONE) ?
+                       kernel_tex_fetch(__prim_object, isect.hits[hit].prim) :
+                       isect.hits[hit].object;
+      int object_flag = kernel_tex_fetch(__object_flag, object);
+      if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+        hit_Ng = -hit_Ng;
+      }
+
+      /* Compute smooth normal. */
+      float3 N = hit_Ng;
+      int prim = kernel_tex_fetch(__prim_index, isect.hits[hit].prim);
+      int shader = kernel_tex_fetch(__tri_shader, prim);
+
+      if (shader & SHADER_SMOOTH_NORMAL) {
+        float u = isect.hits[hit].u;
+        float v = isect.hits[hit].v;
+
+        if (sd->type & PRIMITIVE_TRIANGLE) {
+          N = triangle_smooth_normal(kg, N, prim, u, v);
+        }
 #ifdef __OBJECT_MOTION__
-				else if(sd->type & PRIMITIVE_MOTION_TRIANGLE) {
-					N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time);
-				}
-#endif  /* __OBJECT_MOTION__ */
-			}
-
-			/* Transform normals to world space. */
-			if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-				object_normal_transform(kg, sd, &N);
-				object_normal_transform(kg, sd, &hit_Ng);
-			}
-
-			/* Probability densities for local frame axes. */
-			float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
-			float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
-			float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
-
-			/* Multiple importance sample between 3 axes, power heuristic
-			 * found to be slightly better than balance heuristic. pdf_N
-			 * in the MIS weight and denominator cancelled out. */
-			float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
-			if(isect.num_hits > LOCAL_MAX_HITS) {
-				w *= isect.num_hits/(float)LOCAL_MAX_HITS;
-			}
-
-			/* Real distance to sampled point. */
-			float r = len(hit_P - sd->P);
-
-			/* Compute weight. */
-			float pdf = bssrdf_cubic_pdf(radius, 0.0f, r);
-			float disk_pdf = bssrdf_cubic_pdf(radius, 0.0f, disk_r);
-
-			w *= pdf / disk_pdf;
-
-			/* Sum normal and weight. */
-			sum_N += w * N;
-		}
-	}
-
-	/* Normalize. */
-	float3 N = safe_normalize(sum_N);
-	return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N;
+        else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
+          N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time);
+        }
+#endif /* __OBJECT_MOTION__ */
+      }
+
+      /* Transform normals to world space. */
+      if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+        object_normal_transform(kg, sd, &N);
+        object_normal_transform(kg, sd, &hit_Ng);
+      }
+
+      /* Probability densities for local frame axes. */
+      float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
+      float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
+      float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
+
+      /* Multiple importance sample between 3 axes, power heuristic
+       * found to be slightly better than balance heuristic. pdf_N
+       * in the MIS weight and denominator cancelled out. */
+      float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
+      if (isect.num_hits > LOCAL_MAX_HITS) {
+        w *= isect.num_hits / (float)LOCAL_MAX_HITS;
+      }
+
+      /* Real distance to sampled point. */
+      float r = len(hit_P - sd->P);
+
+      /* Compute weight. */
+      float pdf = bssrdf_cubic_pdf(radius, 0.0f, r);
+      float disk_pdf = bssrdf_cubic_pdf(radius, 0.0f, disk_r);
+
+      w *= pdf / disk_pdf;
+
+      /* Sum normal and weight. */
+      sum_N += w * N;
+    }
+  }
+
+  /* Normalize. */
+  float3 N = safe_normalize(sum_N);
+  return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N;
 }
 
 ccl_device void svm_node_bevel(
-	KernelGlobals *kg,
-	ShaderData *sd,
-	ccl_addr_space PathState *state,
-	float *stack,
-	uint4 node)
+    KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
 {
-	uint num_samples, radius_offset, normal_offset, out_offset;
-	decode_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
+  uint num_samples, radius_offset, normal_offset, out_offset;
+  decode_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
 
-	float radius = stack_load_float(stack, radius_offset);
-	float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples);
+  float radius = stack_load_float(stack, radius_offset);
+  float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples);
 
-	if(stack_valid(normal_offset)) {
-		/* Preserve input normal. */
-		float3 ref_N = stack_load_float3(stack, normal_offset);
-		bevel_N = normalize(ref_N + (bevel_N - sd->N));
-	}
+  if (stack_valid(normal_offset)) {
+    /* Preserve input normal. */
+    float3 ref_N = stack_load_float3(stack, normal_offset);
+    bevel_N = normalize(ref_N + (bevel_N - sd->N));
+  }
 
-	stack_store_float3(stack, out_offset, bevel_N);
+  stack_store_float3(stack, out_offset, bevel_N);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h
index 51590b18505..adfc50d961e 100644
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ b/intern/cycles/kernel/svm/svm_blackbody.h
@@ -34,14 +34,15 @@ CCL_NAMESPACE_BEGIN
 
 /* Blackbody Node */
 
-ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset)
+ccl_device void svm_node_blackbody(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset)
 {
-	/* Input */
-	float temperature = stack_load_float(stack, temperature_offset);
+  /* Input */
+  float temperature = stack_load_float(stack, temperature_offset);
 
-	float3 color_rgb = svm_math_blackbody_color(temperature);
+  float3 color_rgb = svm_math_blackbody_color(temperature);
 
-	stack_store_float3(stack, col_offset, color_rgb);
+  stack_store_float3(stack, col_offset, color_rgb);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index 744d9ff16c5..b5cbfcc72df 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -20,101 +20,119 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_noinline float brick_noise(uint n) /* fast integer noise */
 {
-	uint nn;
-	n = (n + 1013) & 0x7fffffff;
-	n = (n >> 13) ^ n;
-	nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff;
-	return 0.5f * ((float)nn / 1073741824.0f);
+  uint nn;
+  n = (n + 1013) & 0x7fffffff;
+  n = (n >> 13) ^ n;
+  nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff;
+  return 0.5f * ((float)nn / 1073741824.0f);
 }
 
-ccl_device_noinline float2 svm_brick(float3 p, float mortar_size, float mortar_smooth, float bias,
-	float brick_width, float row_height, float offset_amount, int offset_frequency,
-	float squash_amount, int squash_frequency)
+ccl_device_noinline float2 svm_brick(float3 p,
+                                     float mortar_size,
+                                     float mortar_smooth,
+                                     float bias,
+                                     float brick_width,
+                                     float row_height,
+                                     float offset_amount,
+                                     int offset_frequency,
+                                     float squash_amount,
+                                     int squash_frequency)
 {
-	int bricknum, rownum;
-	float offset = 0.0f;
-	float x, y;
-
-	rownum = floor_to_int(p.y / row_height);
-
-	if(offset_frequency && squash_frequency) {
-		brick_width *= (rownum % squash_frequency) ? 1.0f : squash_amount; /* squash */
-		offset = (rownum % offset_frequency) ? 0.0f : (brick_width*offset_amount); /* offset */
-	}
-
-	bricknum = floor_to_int((p.x+offset) / brick_width);
-
-	x = (p.x+offset) - brick_width*bricknum;
-	y = p.y - row_height*rownum;
-
-	float tint = saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias));
-	float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
-
-	float mortar;
-	if(min_dist >= mortar_size) {
-		mortar = 0.0f;
-	}
-	else if(mortar_smooth == 0.0f) {
-		mortar = 1.0f;
-	}
-	else {
-		min_dist = 1.0f - min_dist/mortar_size;
-		mortar = (min_dist < mortar_smooth)? smoothstepf(min_dist / mortar_smooth) : 1.0f;
-	}
-
-	return make_float2(tint, mortar);
+  int bricknum, rownum;
+  float offset = 0.0f;
+  float x, y;
+
+  rownum = floor_to_int(p.y / row_height);
+
+  if (offset_frequency && squash_frequency) {
+    brick_width *= (rownum % squash_frequency) ? 1.0f : squash_amount;           /* squash */
+    offset = (rownum % offset_frequency) ? 0.0f : (brick_width * offset_amount); /* offset */
+  }
+
+  bricknum = floor_to_int((p.x + offset) / brick_width);
+
+  x = (p.x + offset) - brick_width * bricknum;
+  y = p.y - row_height * rownum;
+
+  float tint = saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias));
+  float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
+
+  float mortar;
+  if (min_dist >= mortar_size) {
+    mortar = 0.0f;
+  }
+  else if (mortar_smooth == 0.0f) {
+    mortar = 1.0f;
+  }
+  else {
+    min_dist = 1.0f - min_dist / mortar_size;
+    mortar = (min_dist < mortar_smooth) ? smoothstepf(min_dist / mortar_smooth) : 1.0f;
+  }
+
+  return make_float2(tint, mortar);
 }
 
-ccl_device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_brick(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint4 node2 = read_node(kg, offset);
-	uint4 node3 = read_node(kg, offset);
-	uint4 node4 = read_node(kg, offset);
-
-	/* Input and Output Sockets */
-	uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset;
-	uint mortar_size_offset, bias_offset, brick_width_offset, row_height_offset;
-	uint color_offset, fac_offset, mortar_smooth_offset;
-
-	/* RNA properties */
-	uint offset_frequency, squash_frequency;
-
-	decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
-	decode_node_uchar4(node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
-	decode_node_uchar4(node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset);
-
-	decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL);
-
-	float3 co = stack_load_float3(stack, co_offset);
-
-	float3 color1 = stack_load_float3(stack, color1_offset);
-	float3 color2 = stack_load_float3(stack, color2_offset);
-	float3 mortar = stack_load_float3(stack, mortar_offset);
-
-	float scale = stack_load_float_default(stack, scale_offset, node2.y);
-	float mortar_size = stack_load_float_default(stack, mortar_size_offset, node2.z);
-	float mortar_smooth = stack_load_float_default(stack, mortar_smooth_offset, node4.x);
-	float bias = stack_load_float_default(stack, bias_offset, node2.w);
-	float brick_width = stack_load_float_default(stack, brick_width_offset, node3.x);
-	float row_height = stack_load_float_default(stack, row_height_offset, node3.y);
-	float offset_amount = __int_as_float(node3.z);
-	float squash_amount = __int_as_float(node3.w);
-
-	float2 f2 = svm_brick(co*scale, mortar_size, mortar_smooth, bias, brick_width, row_height,
-		offset_amount, offset_frequency, squash_amount, squash_frequency);
-
-	float tint = f2.x;
-	float f = f2.y;
-
-	if(f != 1.0f) {
-		float facm = 1.0f - tint;
-		color1 = facm * color1 + tint * color2;
-	}
-
-	if(stack_valid(color_offset))
-		stack_store_float3(stack, color_offset, color1*(1.0f-f) + mortar*f);
-	if(stack_valid(fac_offset))
-		stack_store_float(stack, fac_offset, f);
+  uint4 node2 = read_node(kg, offset);
+  uint4 node3 = read_node(kg, offset);
+  uint4 node4 = read_node(kg, offset);
+
+  /* Input and Output Sockets */
+  uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset;
+  uint mortar_size_offset, bias_offset, brick_width_offset, row_height_offset;
+  uint color_offset, fac_offset, mortar_smooth_offset;
+
+  /* RNA properties */
+  uint offset_frequency, squash_frequency;
+
+  decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
+  decode_node_uchar4(
+      node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
+  decode_node_uchar4(
+      node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset);
+
+  decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL);
+
+  float3 co = stack_load_float3(stack, co_offset);
+
+  float3 color1 = stack_load_float3(stack, color1_offset);
+  float3 color2 = stack_load_float3(stack, color2_offset);
+  float3 mortar = stack_load_float3(stack, mortar_offset);
+
+  float scale = stack_load_float_default(stack, scale_offset, node2.y);
+  float mortar_size = stack_load_float_default(stack, mortar_size_offset, node2.z);
+  float mortar_smooth = stack_load_float_default(stack, mortar_smooth_offset, node4.x);
+  float bias = stack_load_float_default(stack, bias_offset, node2.w);
+  float brick_width = stack_load_float_default(stack, brick_width_offset, node3.x);
+  float row_height = stack_load_float_default(stack, row_height_offset, node3.y);
+  float offset_amount = __int_as_float(node3.z);
+  float squash_amount = __int_as_float(node3.w);
+
+  float2 f2 = svm_brick(co * scale,
+                        mortar_size,
+                        mortar_smooth,
+                        bias,
+                        brick_width,
+                        row_height,
+                        offset_amount,
+                        offset_frequency,
+                        squash_amount,
+                        squash_frequency);
+
+  float tint = f2.x;
+  float f = f2.y;
+
+  if (f != 1.0f) {
+    float facm = 1.0f - tint;
+    color1 = facm * color1 + tint * color2;
+  }
+
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, color1 * (1.0f - f) + mortar * f);
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, f);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h
index d71b0ee0b61..dcd75a2fe8f 100644
--- a/intern/cycles/kernel/svm/svm_brightness.h
+++ b/intern/cycles/kernel/svm/svm_brightness.h
@@ -16,19 +16,20 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void svm_node_brightness(ShaderData *sd, float *stack, uint in_color, uint out_color, uint node)
+ccl_device void svm_node_brightness(
+    ShaderData *sd, float *stack, uint in_color, uint out_color, uint node)
 {
-	uint bright_offset, contrast_offset;
-	float3 color = stack_load_float3(stack, in_color);
+  uint bright_offset, contrast_offset;
+  float3 color = stack_load_float3(stack, in_color);
 
-	decode_node_uchar4(node, &bright_offset, &contrast_offset, NULL, NULL);
-	float brightness = stack_load_float(stack, bright_offset);
-	float contrast  = stack_load_float(stack, contrast_offset);
+  decode_node_uchar4(node, &bright_offset, &contrast_offset, NULL, NULL);
+  float brightness = stack_load_float(stack, bright_offset);
+  float contrast = stack_load_float(stack, contrast_offset);
 
-	color = svm_brightness_contrast(color, brightness, contrast);
+  color = svm_brightness_contrast(color, brightness, contrast);
 
-	if(stack_valid(out_color))
-		stack_store_float3(stack, out_color, color);
+  if (stack_valid(out_color))
+    stack_store_float3(stack, out_color, color);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h
index 35aac174409..c9d430a2bba 100644
--- a/intern/cycles/kernel/svm/svm_bump.h
+++ b/intern/cycles/kernel/svm/svm_bump.h
@@ -18,36 +18,42 @@ CCL_NAMESPACE_BEGIN
 
 /* Bump Eval Nodes */
 
-ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg, ShaderData *sd, float *stack, uint offset)
+ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg,
+                                         ShaderData *sd,
+                                         float *stack,
+                                         uint offset)
 {
-	/* save state */
-	stack_store_float3(stack, offset+0, sd->P);
-	stack_store_float3(stack, offset+3, sd->dP.dx);
-	stack_store_float3(stack, offset+6, sd->dP.dy);
-
-	/* set state as if undisplaced */
-	const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED);
-
-	if(desc.offset != ATTR_STD_NOT_FOUND) {
-		float3 P, dPdx, dPdy;
-		P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy);
-
-		object_position_transform(kg, sd, &P);
-		object_dir_transform(kg, sd, &dPdx);
-		object_dir_transform(kg, sd, &dPdy);
-
-		sd->P = P;
-		sd->dP.dx = dPdx;
-		sd->dP.dy = dPdy;
-	}
+  /* save state */
+  stack_store_float3(stack, offset + 0, sd->P);
+  stack_store_float3(stack, offset + 3, sd->dP.dx);
+  stack_store_float3(stack, offset + 6, sd->dP.dy);
+
+  /* set state as if undisplaced */
+  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED);
+
+  if (desc.offset != ATTR_STD_NOT_FOUND) {
+    float3 P, dPdx, dPdy;
+    P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy);
+
+    object_position_transform(kg, sd, &P);
+    object_dir_transform(kg, sd, &dPdx);
+    object_dir_transform(kg, sd, &dPdy);
+
+    sd->P = P;
+    sd->dP.dx = dPdx;
+    sd->dP.dy = dPdy;
+  }
 }
 
-ccl_device void svm_node_leave_bump_eval(KernelGlobals *kg, ShaderData *sd, float *stack, uint offset)
+ccl_device void svm_node_leave_bump_eval(KernelGlobals *kg,
+                                         ShaderData *sd,
+                                         float *stack,
+                                         uint offset)
 {
-	/* restore state */
-	sd->P = stack_load_float3(stack, offset+0);
-	sd->dP.dx = stack_load_float3(stack, offset+3);
-	sd->dP.dy = stack_load_float3(stack, offset+6);
+  /* restore state */
+  sd->P = stack_load_float3(stack, offset + 0);
+  sd->dP.dx = stack_load_float3(stack, offset + 3);
+  sd->dP.dy = stack_load_float3(stack, offset + 6);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h
index cf90229b53b..21a17acf5f1 100644
--- a/intern/cycles/kernel/svm/svm_camera.h
+++ b/intern/cycles/kernel/svm/svm_camera.h
@@ -16,25 +16,30 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void svm_node_camera(KernelGlobals *kg, ShaderData *sd, float *stack, uint out_vector, uint out_zdepth, uint out_distance)
+ccl_device void svm_node_camera(KernelGlobals *kg,
+                                ShaderData *sd,
+                                float *stack,
+                                uint out_vector,
+                                uint out_zdepth,
+                                uint out_distance)
 {
-	float distance;
-	float zdepth;
-	float3 vector;
+  float distance;
+  float zdepth;
+  float3 vector;
 
-	Transform tfm = kernel_data.cam.worldtocamera;
-	vector = transform_point(&tfm, sd->P);
-	zdepth = vector.z;
-	distance = len(vector);
+  Transform tfm = kernel_data.cam.worldtocamera;
+  vector = transform_point(&tfm, sd->P);
+  zdepth = vector.z;
+  distance = len(vector);
 
-	if(stack_valid(out_vector))
-		stack_store_float3(stack, out_vector, normalize(vector));
+  if (stack_valid(out_vector))
+    stack_store_float3(stack, out_vector, normalize(vector));
 
-	if(stack_valid(out_zdepth))
-		stack_store_float(stack, out_zdepth, zdepth);
+  if (stack_valid(out_zdepth))
+    stack_store_float(stack, out_zdepth, zdepth);
 
-	if(stack_valid(out_distance))
-		stack_store_float(stack, out_distance, distance);
+  if (stack_valid(out_distance))
+    stack_store_float(stack, out_distance, distance);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
index 45e6c181e9e..63b4d1e149b 100644
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@@ -20,37 +20,37 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_noinline float svm_checker(float3 p)
 {
-	/* avoid precision issues on unit coordinates */
-	p.x = (p.x + 0.000001f)*0.999999f;
-	p.y = (p.y + 0.000001f)*0.999999f;
-	p.z = (p.z + 0.000001f)*0.999999f;
+  /* avoid precision issues on unit coordinates */
+  p.x = (p.x + 0.000001f) * 0.999999f;
+  p.y = (p.y + 0.000001f) * 0.999999f;
+  p.z = (p.z + 0.000001f) * 0.999999f;
 
-	int xi = abs(float_to_int(floorf(p.x)));
-	int yi = abs(float_to_int(floorf(p.y)));
-	int zi = abs(float_to_int(floorf(p.z)));
+  int xi = abs(float_to_int(floorf(p.x)));
+  int yi = abs(float_to_int(floorf(p.y)));
+  int zi = abs(float_to_int(floorf(p.z)));
 
-	return ((xi % 2 == yi % 2) == (zi % 2))? 1.0f: 0.0f;
+  return ((xi % 2 == yi % 2) == (zi % 2)) ? 1.0f : 0.0f;
 }
 
 ccl_device void svm_node_tex_checker(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-	uint co_offset, color1_offset, color2_offset, scale_offset;
-	uint color_offset, fac_offset;
+  uint co_offset, color1_offset, color2_offset, scale_offset;
+  uint color_offset, fac_offset;
 
-	decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
-	decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
+  decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
+  decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
 
-	float3 co = stack_load_float3(stack, co_offset);
-	float3 color1 = stack_load_float3(stack, color1_offset);
-	float3 color2 = stack_load_float3(stack, color2_offset);
-	float scale = stack_load_float_default(stack, scale_offset, node.w);
+  float3 co = stack_load_float3(stack, co_offset);
+  float3 color1 = stack_load_float3(stack, color1_offset);
+  float3 color2 = stack_load_float3(stack, color2_offset);
+  float scale = stack_load_float_default(stack, scale_offset, node.w);
 
-	float f = svm_checker(co*scale);
+  float f = svm_checker(co * scale);
 
-	if(stack_valid(color_offset))
-		stack_store_float3(stack, color_offset, (f == 1.0f)? color1: color2);
-	if(stack_valid(fac_offset))
-		stack_store_float(stack, fac_offset, f);
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, (f == 1.0f) ? color1 : color2);
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, f);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index a7e87715ed4..270fe4c8615 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -20,1140 +20,1237 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline float3 sigma_from_concentration(float eumelanin, float pheomelanin)
 {
-	return eumelanin*make_float3(0.506f, 0.841f, 1.653f) + pheomelanin*make_float3(0.343f, 0.733f, 1.924f);
+  return eumelanin * make_float3(0.506f, 0.841f, 1.653f) +
+         pheomelanin * make_float3(0.343f, 0.733f, 1.924f);
 }
 
 ccl_device_inline float3 sigma_from_reflectance(float3 color, float azimuthal_roughness)
 {
-	float x = azimuthal_roughness;
-	float roughness_fac = (((((0.245f*x) + 5.574f)*x - 10.73f)*x + 2.532f)*x - 0.215f)*x + 5.969f;
-	float3 sigma = log3(color) / roughness_fac;
-	return sigma * sigma;
+  float x = azimuthal_roughness;
+  float roughness_fac = (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x +
+                        5.969f;
+  float3 sigma = log3(color) / roughness_fac;
+  return sigma * sigma;
 }
 
 /* Closure Nodes */
 
-ccl_device void svm_node_glass_setup(ShaderData *sd, MicrofacetBsdf *bsdf, int type, float eta, float roughness, bool refract)
+ccl_device void svm_node_glass_setup(
+    ShaderData *sd, MicrofacetBsdf *bsdf, int type, float eta, float roughness, bool refract)
 {
-	if(type == CLOSURE_BSDF_SHARP_GLASS_ID) {
-		if(refract) {
-			bsdf->alpha_y = 0.0f;
-			bsdf->alpha_x = 0.0f;
-			bsdf->ior = eta;
-			sd->flag |= bsdf_refraction_setup(bsdf);
-		}
-		else {
-			bsdf->alpha_y = 0.0f;
-			bsdf->alpha_x = 0.0f;
-			bsdf->ior = 0.0f;
-			sd->flag |= bsdf_reflection_setup(bsdf);
-		}
-	}
-	else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) {
-		bsdf->alpha_x = roughness;
-		bsdf->alpha_y = roughness;
-		bsdf->ior = eta;
-
-		if(refract)
-			sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
-		else
-			sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
-	}
-	else {
-		bsdf->alpha_x = roughness;
-		bsdf->alpha_y = roughness;
-		bsdf->ior = eta;
-
-		if(refract)
-			sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
-		else
-			sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
-	}
+  if (type == CLOSURE_BSDF_SHARP_GLASS_ID) {
+    if (refract) {
+      bsdf->alpha_y = 0.0f;
+      bsdf->alpha_x = 0.0f;
+      bsdf->ior = eta;
+      sd->flag |= bsdf_refraction_setup(bsdf);
+    }
+    else {
+      bsdf->alpha_y = 0.0f;
+      bsdf->alpha_x = 0.0f;
+      bsdf->ior = 0.0f;
+      sd->flag |= bsdf_reflection_setup(bsdf);
+    }
+  }
+  else if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) {
+    bsdf->alpha_x = roughness;
+    bsdf->alpha_y = roughness;
+    bsdf->ior = eta;
+
+    if (refract)
+      sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
+    else
+      sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+  }
+  else {
+    bsdf->alpha_x = roughness;
+    bsdf->alpha_y = roughness;
+    bsdf->ior = eta;
+
+    if (refract)
+      sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+    else
+      sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+  }
 }
 
-ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag, int *offset)
+ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
+                                      ShaderData *sd,
+                                      float *stack,
+                                      uint4 node,
+                                      ShaderType shader_type,
+                                      int path_flag,
+                                      int *offset)
 {
-	uint type, param1_offset, param2_offset;
+  uint type, param1_offset, param2_offset;
 
-	uint mix_weight_offset;
-	decode_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
-	float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f);
+  uint mix_weight_offset;
+  decode_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
+  float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
+                                                       1.0f);
 
-	/* note we read this extra node before weight check, so offset is added */
-	uint4 data_node = read_node(kg, offset);
+  /* note we read this extra node before weight check, so offset is added */
+  uint4 data_node = read_node(kg, offset);
 
-	/* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */
-	if(mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) {
-		if(type == CLOSURE_BSDF_PRINCIPLED_ID) {
-			/* Read all principled BSDF extra data to get the right offset. */
-			read_node(kg, offset);
-			read_node(kg, offset);
-			read_node(kg, offset);
-			read_node(kg, offset);
-		}
+  /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */
+  if (mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) {
+    if (type == CLOSURE_BSDF_PRINCIPLED_ID) {
+      /* Read all principled BSDF extra data to get the right offset. */
+      read_node(kg, offset);
+      read_node(kg, offset);
+      read_node(kg, offset);
+      read_node(kg, offset);
+    }
 
-		return;
-	}
+    return;
+  }
 
-	float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N;
+  float3 N = stack_valid(data_node.x) ? stack_load_float3(stack, data_node.x) : sd->N;
 
-	float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z);
-	float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);
+  float param1 = (stack_valid(param1_offset)) ? stack_load_float(stack, param1_offset) :
+                                                __uint_as_float(node.z);
+  float param2 = (stack_valid(param2_offset)) ? stack_load_float(stack, param2_offset) :
+                                                __uint_as_float(node.w);
 
-	switch(type) {
+  switch (type) {
 #ifdef __PRINCIPLED__
-		case CLOSURE_BSDF_PRINCIPLED_ID: {
-			uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset, sheen_offset,
-				sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset, eta_offset, transmission_offset,
-				anisotropic_rotation_offset, transmission_roughness_offset;
-			uint4 data_node2 = read_node(kg, offset);
-
-			float3 T = stack_load_float3(stack, data_node.y);
-			decode_node_uchar4(data_node.z, &specular_offset, &roughness_offset, &specular_tint_offset, &anisotropic_offset);
-			decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_roughness_offset);
-			decode_node_uchar4(data_node2.x, &eta_offset, &transmission_offset, &anisotropic_rotation_offset, &transmission_roughness_offset);
-
-			// get Disney principled parameters
-			float metallic = param1;
-			float subsurface = param2;
-			float specular = stack_load_float(stack, specular_offset);
-			float roughness = stack_load_float(stack, roughness_offset);
-			float specular_tint = stack_load_float(stack, specular_tint_offset);
-			float anisotropic = stack_load_float(stack, anisotropic_offset);
-			float sheen = stack_load_float(stack, sheen_offset);
-			float sheen_tint = stack_load_float(stack, sheen_tint_offset);
-			float clearcoat = stack_load_float(stack, clearcoat_offset);
-			float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset);
-			float transmission = stack_load_float(stack, transmission_offset);
-			float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset);
-			float transmission_roughness = stack_load_float(stack, transmission_roughness_offset);
-			float eta = fmaxf(stack_load_float(stack, eta_offset), 1e-5f);
-
-			ClosureType distribution = (ClosureType) data_node2.y;
-			ClosureType subsurface_method = (ClosureType) data_node2.z;
-
-			/* rotate tangent */
-			if(anisotropic_rotation != 0.0f)
-				T = rotate_around_axis(T, N, anisotropic_rotation * M_2PI_F);
-
-			/* calculate ior */
-			float ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
-
-			// calculate fresnel for refraction
-			float cosNO = dot(N, sd->I);
-			float fresnel = fresnel_dielectric_cos(cosNO, ior);
-
-			// calculate weights of the diffuse and specular part
-			float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transmission));
-
-			float final_transmission = saturate(transmission) * (1.0f - saturate(metallic));
-			float specular_weight = (1.0f - final_transmission);
-
-			// get the base color
-			uint4 data_base_color = read_node(kg, offset);
-			float3 base_color = stack_valid(data_base_color.x) ? stack_load_float3(stack, data_base_color.x) :
-				make_float3(__uint_as_float(data_base_color.y), __uint_as_float(data_base_color.z), __uint_as_float(data_base_color.w));
-
-			// get the additional clearcoat normal and subsurface scattering radius
-			uint4 data_cn_ssr = read_node(kg, offset);
-			float3 clearcoat_normal = stack_valid(data_cn_ssr.x) ? stack_load_float3(stack, data_cn_ssr.x) : sd->N;
-			float3 subsurface_radius = stack_valid(data_cn_ssr.y) ? stack_load_float3(stack, data_cn_ssr.y) : make_float3(1.0f, 1.0f, 1.0f);
-
-			// get the subsurface color
-			uint4 data_subsurface_color = read_node(kg, offset);
-			float3 subsurface_color = stack_valid(data_subsurface_color.x) ? stack_load_float3(stack, data_subsurface_color.x) :
-				make_float3(__uint_as_float(data_subsurface_color.y), __uint_as_float(data_subsurface_color.z), __uint_as_float(data_subsurface_color.w));
-
-			float3 weight = sd->svm_closure_weight * mix_weight;
-
-#ifdef __SUBSURFACE__
-			float3 mixed_ss_base_color = subsurface_color * subsurface + base_color * (1.0f - subsurface);
-			float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight;
-
-			/* disable in case of diffuse ancestor, can't see it well then and
-			 * adds considerably noise due to probabilities of continuing path
-			 * getting lower and lower */
-			if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
-				subsurface = 0.0f;
-
-				/* need to set the base color in this case such that the
-				 * rays get the correctly mixed color after transmitting
-				 * the object */
-				base_color = mixed_ss_base_color;
-			}
-
-			/* diffuse */
-			if(fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) {
-				if(subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
-					float3 diff_weight = weight * base_color * diffuse_weight;
-
-					PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
-
-					if(bsdf) {
-						bsdf->N = N;
-						bsdf->roughness = roughness;
-
-						/* setup bsdf */
-						sd->flag |= bsdf_principled_diffuse_setup(bsdf);
-					}
-				}
-				else if(subsurface > CLOSURE_WEIGHT_CUTOFF) {
-					Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight);
-
-					if(bssrdf) {
-						bssrdf->radius = subsurface_radius * subsurface;
-						bssrdf->albedo = (subsurface_method == CLOSURE_BSSRDF_PRINCIPLED_ID)? subsurface_color:  mixed_ss_base_color;
-						bssrdf->texture_blur = 0.0f;
-						bssrdf->sharpness = 0.0f;
-						bssrdf->N = N;
-						bssrdf->roughness = roughness;
-
-						/* setup bsdf */
-						sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method);
-					}
-				}
-			}
-#else
-			/* diffuse */
-			if(diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
-				float3 diff_weight = weight * base_color * diffuse_weight;
-
-				PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
-
-				if(bsdf) {
-					bsdf->N = N;
-					bsdf->roughness = roughness;
-
-					/* setup bsdf */
-					sd->flag |= bsdf_principled_diffuse_setup(bsdf);
-				}
-			}
-#endif
-
-			/* sheen */
-			if(diffuse_weight > CLOSURE_WEIGHT_CUTOFF && sheen > CLOSURE_WEIGHT_CUTOFF) {
-				float m_cdlum = linear_rgb_to_gray(kg, base_color);
-				float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : make_float3(1.0f, 1.0f, 1.0f); // normalize lum. to isolate hue+sat
-
-				/* color of the sheen component */
-				float3 sheen_color = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - sheen_tint) + m_ctint * sheen_tint;
-
-				float3 sheen_weight = weight * sheen * sheen_color * diffuse_weight;
-
-				PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf*)bsdf_alloc(sd, sizeof(PrincipledSheenBsdf), sheen_weight);
-
-				if(bsdf) {
-					bsdf->N = N;
-
-					/* setup bsdf */
-					sd->flag |= bsdf_principled_sheen_setup(bsdf);
-				}
-			}
-
-			/* specular reflection */
+    case CLOSURE_BSDF_PRINCIPLED_ID: {
+      uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset,
+          sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset,
+          eta_offset, transmission_offset, anisotropic_rotation_offset,
+          transmission_roughness_offset;
+      uint4 data_node2 = read_node(kg, offset);
+
+      float3 T = stack_load_float3(stack, data_node.y);
+      decode_node_uchar4(data_node.z,
+                         &specular_offset,
+                         &roughness_offset,
+                         &specular_tint_offset,
+                         &anisotropic_offset);
+      decode_node_uchar4(data_node.w,
+                         &sheen_offset,
+                         &sheen_tint_offset,
+                         &clearcoat_offset,
+                         &clearcoat_roughness_offset);
+      decode_node_uchar4(data_node2.x,
+                         &eta_offset,
+                         &transmission_offset,
+                         &anisotropic_rotation_offset,
+                         &transmission_roughness_offset);
+
+      // get Disney principled parameters
+      float metallic = param1;
+      float subsurface = param2;
+      float specular = stack_load_float(stack, specular_offset);
+      float roughness = stack_load_float(stack, roughness_offset);
+      float specular_tint = stack_load_float(stack, specular_tint_offset);
+      float anisotropic = stack_load_float(stack, anisotropic_offset);
+      float sheen = stack_load_float(stack, sheen_offset);
+      float sheen_tint = stack_load_float(stack, sheen_tint_offset);
+      float clearcoat = stack_load_float(stack, clearcoat_offset);
+      float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset);
+      float transmission = stack_load_float(stack, transmission_offset);
+      float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset);
+      float transmission_roughness = stack_load_float(stack, transmission_roughness_offset);
+      float eta = fmaxf(stack_load_float(stack, eta_offset), 1e-5f);
+
+      ClosureType distribution = (ClosureType)data_node2.y;
+      ClosureType subsurface_method = (ClosureType)data_node2.z;
+
+      /* rotate tangent */
+      if (anisotropic_rotation != 0.0f)
+        T = rotate_around_axis(T, N, anisotropic_rotation * M_2PI_F);
+
+      /* calculate ior */
+      float ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
+
+      // calculate fresnel for refraction
+      float cosNO = dot(N, sd->I);
+      float fresnel = fresnel_dielectric_cos(cosNO, ior);
+
+      // calculate weights of the diffuse and specular part
+      float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transmission));
+
+      float final_transmission = saturate(transmission) * (1.0f - saturate(metallic));
+      float specular_weight = (1.0f - final_transmission);
+
+      // get the base color
+      uint4 data_base_color = read_node(kg, offset);
+      float3 base_color = stack_valid(data_base_color.x) ?
+                              stack_load_float3(stack, data_base_color.x) :
+                              make_float3(__uint_as_float(data_base_color.y),
+                                          __uint_as_float(data_base_color.z),
+                                          __uint_as_float(data_base_color.w));
+
+      // get the additional clearcoat normal and subsurface scattering radius
+      uint4 data_cn_ssr = read_node(kg, offset);
+      float3 clearcoat_normal = stack_valid(data_cn_ssr.x) ?
+                                    stack_load_float3(stack, data_cn_ssr.x) :
+                                    sd->N;
+      float3 subsurface_radius = stack_valid(data_cn_ssr.y) ?
+                                     stack_load_float3(stack, data_cn_ssr.y) :
+                                     make_float3(1.0f, 1.0f, 1.0f);
+
+      // get the subsurface color
+      uint4 data_subsurface_color = read_node(kg, offset);
+      float3 subsurface_color = stack_valid(data_subsurface_color.x) ?
+                                    stack_load_float3(stack, data_subsurface_color.x) :
+                                    make_float3(__uint_as_float(data_subsurface_color.y),
+                                                __uint_as_float(data_subsurface_color.z),
+                                                __uint_as_float(data_subsurface_color.w));
+
+      float3 weight = sd->svm_closure_weight * mix_weight;
+
+#  ifdef __SUBSURFACE__
+      float3 mixed_ss_base_color = subsurface_color * subsurface +
+                                   base_color * (1.0f - subsurface);
+      float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight;
+
+      /* disable in case of diffuse ancestor, can't see it well then and
+       * adds considerably noise due to probabilities of continuing path
+       * getting lower and lower */
+      if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
+        subsurface = 0.0f;
+
+        /* need to set the base color in this case such that the
+         * rays get the correctly mixed color after transmitting
+         * the object */
+        base_color = mixed_ss_base_color;
+      }
+
+      /* diffuse */
+      if (fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) {
+        if (subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
+          float3 diff_weight = weight * base_color * diffuse_weight;
+
+          PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+              sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
+
+          if (bsdf) {
+            bsdf->N = N;
+            bsdf->roughness = roughness;
+
+            /* setup bsdf */
+            sd->flag |= bsdf_principled_diffuse_setup(bsdf);
+          }
+        }
+        else if (subsurface > CLOSURE_WEIGHT_CUTOFF) {
+          Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight);
+
+          if (bssrdf) {
+            bssrdf->radius = subsurface_radius * subsurface;
+            bssrdf->albedo = (subsurface_method == CLOSURE_BSSRDF_PRINCIPLED_ID) ?
+                                 subsurface_color :
+                                 mixed_ss_base_color;
+            bssrdf->texture_blur = 0.0f;
+            bssrdf->sharpness = 0.0f;
+            bssrdf->N = N;
+            bssrdf->roughness = roughness;
+
+            /* setup bsdf */
+            sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method);
+          }
+        }
+      }
+#  else
+      /* diffuse */
+      if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
+        float3 diff_weight = weight * base_color * diffuse_weight;
+
+        PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+            sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
+
+        if (bsdf) {
+          bsdf->N = N;
+          bsdf->roughness = roughness;
+
+          /* setup bsdf */
+          sd->flag |= bsdf_principled_diffuse_setup(bsdf);
+        }
+      }
+#  endif
+
+      /* sheen */
+      if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF && sheen > CLOSURE_WEIGHT_CUTOFF) {
+        float m_cdlum = linear_rgb_to_gray(kg, base_color);
+        float3 m_ctint = m_cdlum > 0.0f ?
+                             base_color / m_cdlum :
+                             make_float3(1.0f, 1.0f, 1.0f);  // normalize lum. to isolate hue+sat
+
+        /* color of the sheen component */
+        float3 sheen_color = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - sheen_tint) +
+                             m_ctint * sheen_tint;
+
+        float3 sheen_weight = weight * sheen * sheen_color * diffuse_weight;
+
+        PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)bsdf_alloc(
+            sd, sizeof(PrincipledSheenBsdf), sheen_weight);
+
+        if (bsdf) {
+          bsdf->N = N;
+
+          /* setup bsdf */
+          sd->flag |= bsdf_principled_sheen_setup(bsdf);
+        }
+      }
+
+      /* specular reflection */
+#  ifdef __CAUSTICS_TRICKS__
+      if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+#  endif
+        if (specular_weight > CLOSURE_WEIGHT_CUTOFF &&
+            (specular > CLOSURE_WEIGHT_CUTOFF || metallic > CLOSURE_WEIGHT_CUTOFF)) {
+          float3 spec_weight = weight * specular_weight;
+
+          MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+              sd, sizeof(MicrofacetBsdf), spec_weight);
+          MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
+                                                        sd, sizeof(MicrofacetExtra)) :
+                                                    NULL;
+
+          if (bsdf && extra) {
+            bsdf->N = N;
+            bsdf->ior = (2.0f / (1.0f - safe_sqrtf(0.08f * specular))) - 1.0f;
+            bsdf->T = T;
+            bsdf->extra = extra;
+
+            float aspect = safe_sqrtf(1.0f - anisotropic * 0.9f);
+            float r2 = roughness * roughness;
+
+            bsdf->alpha_x = r2 / aspect;
+            bsdf->alpha_y = r2 * aspect;
+
+            float m_cdlum = 0.3f * base_color.x + 0.6f * base_color.y +
+                            0.1f * base_color.z;  // luminance approx.
+            float3 m_ctint = m_cdlum > 0.0f ?
+                                 base_color / m_cdlum :
+                                 make_float3(
+                                     0.0f, 0.0f, 0.0f);  // normalize lum. to isolate hue+sat
+            float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint) +
+                             m_ctint * specular_tint;
+
+            bsdf->extra->cspec0 = (specular * 0.08f * tmp_col) * (1.0f - metallic) +
+                                  base_color * metallic;
+            bsdf->extra->color = base_color;
+            bsdf->extra->clearcoat = 0.0f;
+
+            /* setup bsdf */
+            if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID ||
+                roughness <= 0.075f) /* use single-scatter GGX */
+              sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
+            else /* use multi-scatter GGX */
+              sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
+          }
+        }
+#  ifdef __CAUSTICS_TRICKS__
+      }
+#  endif
+
+      /* BSDF */
+#  ifdef __CAUSTICS_TRICKS__
+      if (kernel_data.integrator.caustics_reflective ||
+          kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+#  endif
+        if (final_transmission > CLOSURE_WEIGHT_CUTOFF) {
+          float3 glass_weight = weight * final_transmission;
+          float3 cspec0 = base_color * specular_tint +
+                          make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint);
+
+          if (roughness <= 5e-2f ||
+              distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) { /* use single-scatter GGX */
+            float refl_roughness = roughness;
+
+            /* reflection */
+#  ifdef __CAUSTICS_TRICKS__
+            if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
+#  endif
+            {
+              MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+                  sd, sizeof(MicrofacetBsdf), glass_weight * fresnel);
+              MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
+                                                            sd, sizeof(MicrofacetExtra)) :
+                                                        NULL;
+
+              if (bsdf && extra) {
+                bsdf->N = N;
+                bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+                bsdf->extra = extra;
+
+                bsdf->alpha_x = refl_roughness * refl_roughness;
+                bsdf->alpha_y = refl_roughness * refl_roughness;
+                bsdf->ior = ior;
+
+                bsdf->extra->color = base_color;
+                bsdf->extra->cspec0 = cspec0;
+                bsdf->extra->clearcoat = 0.0f;
+
+                /* setup bsdf */
+                sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
+              }
+            }
+
+            /* refraction */
+#  ifdef __CAUSTICS_TRICKS__
+            if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
+#  endif
+            {
+              MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+                  sd, sizeof(MicrofacetBsdf), base_color * glass_weight * (1.0f - fresnel));
+              if (bsdf) {
+                bsdf->N = N;
+                bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+                bsdf->extra = NULL;
+
+                if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID)
+                  transmission_roughness = 1.0f - (1.0f - refl_roughness) *
+                                                      (1.0f - transmission_roughness);
+                else
+                  transmission_roughness = refl_roughness;
+
+                bsdf->alpha_x = transmission_roughness * transmission_roughness;
+                bsdf->alpha_y = transmission_roughness * transmission_roughness;
+                bsdf->ior = ior;
+
+                /* setup bsdf */
+                sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+              }
+            }
+          }
+          else { /* use multi-scatter GGX */
+            MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+                sd, sizeof(MicrofacetBsdf), glass_weight);
+            MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
+                                                          sd, sizeof(MicrofacetExtra)) :
+                                                      NULL;
+
+            if (bsdf && extra) {
+              bsdf->N = N;
+              bsdf->extra = extra;
+              bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+
+              bsdf->alpha_x = roughness * roughness;
+              bsdf->alpha_y = roughness * roughness;
+              bsdf->ior = ior;
+
+              bsdf->extra->color = base_color;
+              bsdf->extra->cspec0 = cspec0;
+              bsdf->extra->clearcoat = 0.0f;
+
+              /* setup bsdf */
+              sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
+            }
+          }
+        }
+#  ifdef __CAUSTICS_TRICKS__
+      }
+#  endif
+
+      /* clearcoat */
+#  ifdef __CAUSTICS_TRICKS__
+      if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+#  endif
+        if (clearcoat > CLOSURE_WEIGHT_CUTOFF) {
+          MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+          MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
+                                                        sd, sizeof(MicrofacetExtra)) :
+                                                    NULL;
+
+          if (bsdf && extra) {
+            bsdf->N = clearcoat_normal;
+            bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+            bsdf->ior = 1.5f;
+            bsdf->extra = extra;
+
+            bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness;
+            bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness;
+
+            bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
+            bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
+            bsdf->extra->clearcoat = clearcoat;
+
+            /* setup bsdf */
+            sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
+          }
+        }
+#  ifdef __CAUSTICS_TRICKS__
+      }
+#  endif
+
+      break;
+    }
+#endif /* __PRINCIPLED__ */
+    case CLOSURE_BSDF_DIFFUSE_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      OrenNayarBsdf *bsdf = (OrenNayarBsdf *)bsdf_alloc(sd, sizeof(OrenNayarBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+
+        float roughness = param1;
+
+        if (roughness == 0.0f) {
+          sd->flag |= bsdf_diffuse_setup((DiffuseBsdf *)bsdf);
+        }
+        else {
+          bsdf->roughness = roughness;
+          sd->flag |= bsdf_oren_nayar_setup(bsdf);
+        }
+      }
+      break;
+    }
+    case CLOSURE_BSDF_TRANSLUCENT_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+        sd->flag |= bsdf_translucent_setup(bsdf);
+      }
+      break;
+    }
+    case CLOSURE_BSDF_TRANSPARENT_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      bsdf_transparent_setup(sd, weight, path_flag);
+      break;
+    }
+    case CLOSURE_BSDF_REFLECTION_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: {
 #ifdef __CAUSTICS_TRICKS__
-			if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+      if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
+        break;
 #endif
-				if(specular_weight > CLOSURE_WEIGHT_CUTOFF && (specular > CLOSURE_WEIGHT_CUTOFF || metallic > CLOSURE_WEIGHT_CUTOFF)) {
-					float3 spec_weight = weight * specular_weight;
-
-					MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), spec_weight);
-					MicrofacetExtra *extra = (bsdf != NULL)
-					        ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra))
-					        : NULL;
-
-					if(bsdf && extra) {
-						bsdf->N = N;
-						bsdf->ior = (2.0f / (1.0f - safe_sqrtf(0.08f * specular))) - 1.0f;
-						bsdf->T = T;
-						bsdf->extra = extra;
-
-						float aspect = safe_sqrtf(1.0f - anisotropic * 0.9f);
-						float r2 = roughness * roughness;
-
-						bsdf->alpha_x = r2 / aspect;
-						bsdf->alpha_y = r2 * aspect;
-
-						float m_cdlum = 0.3f * base_color.x + 0.6f * base_color.y + 0.1f * base_color.z; // luminance approx.
-						float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : make_float3(0.0f, 0.0f, 0.0f); // normalize lum. to isolate hue+sat
-						float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint) + m_ctint * specular_tint;
-
-						bsdf->extra->cspec0 = (specular * 0.08f * tmp_col) * (1.0f - metallic) + base_color * metallic;
-						bsdf->extra->color = base_color;
-						bsdf->extra->clearcoat = 0.0f;
-
-						/* setup bsdf */
-						if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID || roughness <= 0.075f) /* use single-scatter GGX */
-							sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
-						else  /* use multi-scatter GGX */
-							sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
-					}
-				}
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+
+      if (!bsdf) {
+        break;
+      }
+
+      float roughness = sqr(param1);
+
+      bsdf->N = N;
+      bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+      bsdf->alpha_x = roughness;
+      bsdf->alpha_y = roughness;
+      bsdf->ior = 0.0f;
+      bsdf->extra = NULL;
+
+      /* setup bsdf */
+      if (type == CLOSURE_BSDF_REFLECTION_ID)
+        sd->flag |= bsdf_reflection_setup(bsdf);
+      else if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID)
+        sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+      else if (type == CLOSURE_BSDF_MICROFACET_GGX_ID)
+        sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+      else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) {
+        kernel_assert(stack_valid(data_node.z));
+        bsdf->extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+        if (bsdf->extra) {
+          bsdf->extra->color = stack_load_float3(stack, data_node.z);
+          bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
+          bsdf->extra->clearcoat = 0.0f;
+          sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
+        }
+      }
+      else {
+        sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf);
+      }
+
+      break;
+    }
+    case CLOSURE_BSDF_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
 #ifdef __CAUSTICS_TRICKS__
-			}
+      if (!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
+        break;
 #endif
-
-			/* BSDF */
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+        bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+        bsdf->extra = NULL;
+
+        float eta = fmaxf(param2, 1e-5f);
+        eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
+
+        /* setup bsdf */
+        if (type == CLOSURE_BSDF_REFRACTION_ID) {
+          bsdf->alpha_x = 0.0f;
+          bsdf->alpha_y = 0.0f;
+          bsdf->ior = eta;
+
+          sd->flag |= bsdf_refraction_setup(bsdf);
+        }
+        else {
+          float roughness = sqr(param1);
+          bsdf->alpha_x = roughness;
+          bsdf->alpha_y = roughness;
+          bsdf->ior = eta;
+
+          if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID)
+            sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
+          else
+            sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+        }
+      }
+
+      break;
+    }
+    case CLOSURE_BSDF_SHARP_GLASS_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: {
 #ifdef __CAUSTICS_TRICKS__
-			if(kernel_data.integrator.caustics_reflective || kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+      if (!kernel_data.integrator.caustics_reflective &&
+          !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) {
+        break;
+      }
 #endif
-				if(final_transmission > CLOSURE_WEIGHT_CUTOFF) {
-					float3 glass_weight = weight * final_transmission;
-					float3 cspec0 = base_color * specular_tint + make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint);
+      float3 weight = sd->svm_closure_weight * mix_weight;
 
-					if(roughness <= 5e-2f || distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) { /* use single-scatter GGX */
-						float refl_roughness = roughness;
+      /* index of refraction */
+      float eta = fmaxf(param2, 1e-5f);
+      eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
 
-						/* reflection */
-#ifdef __CAUSTICS_TRICKS__
-						if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#endif
-						{
-							MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), glass_weight*fresnel);
-							MicrofacetExtra *extra = (bsdf != NULL)
-							        ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra))
-							        : NULL;
-
-							if(bsdf && extra) {
-								bsdf->N = N;
-								bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-								bsdf->extra = extra;
-
-								bsdf->alpha_x = refl_roughness * refl_roughness;
-								bsdf->alpha_y = refl_roughness * refl_roughness;
-								bsdf->ior = ior;
-
-								bsdf->extra->color = base_color;
-								bsdf->extra->cspec0 = cspec0;
-								bsdf->extra->clearcoat = 0.0f;
-
-								/* setup bsdf */
-								sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
-							}
-						}
-
-						/* refraction */
-#ifdef __CAUSTICS_TRICKS__
-						if(kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#endif
-						{
-							MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), base_color*glass_weight*(1.0f - fresnel));
-							if(bsdf) {
-								bsdf->N = N;
-								bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-								bsdf->extra = NULL;
-
-								if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID)
-									transmission_roughness = 1.0f - (1.0f - refl_roughness) * (1.0f - transmission_roughness);
-								else
-									transmission_roughness = refl_roughness;
-
-								bsdf->alpha_x = transmission_roughness * transmission_roughness;
-								bsdf->alpha_y = transmission_roughness * transmission_roughness;
-								bsdf->ior = ior;
-
-								/* setup bsdf */
-								sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
-							}
-						}
-					}
-					else { /* use multi-scatter GGX */
-						MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), glass_weight);
-						MicrofacetExtra *extra = (bsdf != NULL)
-						        ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra))
-						        : NULL;
-
-						if(bsdf && extra) {
-							bsdf->N = N;
-							bsdf->extra = extra;
-							bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-
-							bsdf->alpha_x = roughness * roughness;
-							bsdf->alpha_y = roughness * roughness;
-							bsdf->ior = ior;
-
-							bsdf->extra->color = base_color;
-							bsdf->extra->cspec0 = cspec0;
-							bsdf->extra->clearcoat = 0.0f;
-
-							/* setup bsdf */
-							sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
-						}
-					}
-				}
-#ifdef __CAUSTICS_TRICKS__
-			}
-#endif
+      /* fresnel */
+      float cosNO = dot(N, sd->I);
+      float fresnel = fresnel_dielectric_cos(cosNO, eta);
+      float roughness = sqr(param1);
 
-			/* clearcoat */
+      /* reflection */
 #ifdef __CAUSTICS_TRICKS__
-			if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+      if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
 #endif
-				if(clearcoat > CLOSURE_WEIGHT_CUTOFF) {
-					MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
-					MicrofacetExtra *extra = (bsdf != NULL)
-					        ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra))
-					        : NULL;
-
-					if(bsdf && extra) {
-						bsdf->N = clearcoat_normal;
-						bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-						bsdf->ior = 1.5f;
-						bsdf->extra = extra;
-
-						bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness;
-						bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness;
-
-						bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
-						bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
-						bsdf->extra->clearcoat = clearcoat;
-
-						/* setup bsdf */
-						sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
-					}
-				}
+      {
+        MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+            sd, sizeof(MicrofacetBsdf), weight * fresnel);
+
+        if (bsdf) {
+          bsdf->N = N;
+          bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+          bsdf->extra = NULL;
+          svm_node_glass_setup(sd, bsdf, type, eta, roughness, false);
+        }
+      }
+
+      /* refraction */
 #ifdef __CAUSTICS_TRICKS__
-			}
+      if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
 #endif
-
-			break;
-		}
-#endif  /* __PRINCIPLED__ */
-		case CLOSURE_BSDF_DIFFUSE_ID: {
-			float3 weight = sd->svm_closure_weight * mix_weight;
-			OrenNayarBsdf *bsdf = (OrenNayarBsdf*)bsdf_alloc(sd, sizeof(OrenNayarBsdf), weight);
-
-			if(bsdf) {
-				bsdf->N = N;
-
-				float roughness = param1;
-
-				if(roughness == 0.0f) {
-					sd->flag |= bsdf_diffuse_setup((DiffuseBsdf*)bsdf);
-				}
-				else {
-					bsdf->roughness = roughness;
-					sd->flag |= bsdf_oren_nayar_setup(bsdf);
-				}
-			}
-			break;
-		}
-		case CLOSURE_BSDF_TRANSLUCENT_ID: {
-			float3 weight = sd->svm_closure_weight * mix_weight;
-			DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
-
-			if(bsdf) {
-				bsdf->N = N;
-				sd->flag |= bsdf_translucent_setup(bsdf);
-			}
-			break;
-		}
-		case CLOSURE_BSDF_TRANSPARENT_ID: {
-			float3 weight = sd->svm_closure_weight * mix_weight;
-			bsdf_transparent_setup(sd, weight, path_flag);
-			break;
-		}
-		case CLOSURE_BSDF_REFLECTION_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_ID:
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
-		case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: {
+      {
+        MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+            sd, sizeof(MicrofacetBsdf), weight * (1.0f - fresnel));
+
+        if (bsdf) {
+          bsdf->N = N;
+          bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+          bsdf->extra = NULL;
+          svm_node_glass_setup(sd, bsdf, type, eta, roughness, true);
+        }
+      }
+
+      break;
+    }
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: {
 #ifdef __CAUSTICS_TRICKS__
-			if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
-				break;
+      if (!kernel_data.integrator.caustics_reflective &&
+          !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
+        break;
 #endif
-			float3 weight = sd->svm_closure_weight * mix_weight;
-			MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
-
-			if(!bsdf) {
-				break;
-			}
-
-			float roughness = sqr(param1);
-
-			bsdf->N = N;
-			bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-			bsdf->alpha_x = roughness;
-			bsdf->alpha_y = roughness;
-			bsdf->ior = 0.0f;
-			bsdf->extra = NULL;
-
-			/* setup bsdf */
-			if(type == CLOSURE_BSDF_REFLECTION_ID)
-				sd->flag |= bsdf_reflection_setup(bsdf);
-			else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID)
-				sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
-			else if(type == CLOSURE_BSDF_MICROFACET_GGX_ID)
-				sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
-			else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) {
-				kernel_assert(stack_valid(data_node.z));
-				bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
-				if(bsdf->extra) {
-					bsdf->extra->color = stack_load_float3(stack, data_node.z);
-					bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
-					bsdf->extra->clearcoat = 0.0f;
-					sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
-				}
-			}
-			else {
-				sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf);
-			}
-
-			break;
-		}
-		case CLOSURE_BSDF_REFRACTION_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+      if (!bsdf) {
+        break;
+      }
+
+      MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+      if (!extra) {
+        break;
+      }
+
+      bsdf->N = N;
+      bsdf->extra = extra;
+      bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+
+      float roughness = sqr(param1);
+      bsdf->alpha_x = roughness;
+      bsdf->alpha_y = roughness;
+      float eta = fmaxf(param2, 1e-5f);
+      bsdf->ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
+
+      kernel_assert(stack_valid(data_node.z));
+      bsdf->extra->color = stack_load_float3(stack, data_node.z);
+      bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
+      bsdf->extra->clearcoat = 0.0f;
+
+      /* setup bsdf */
+      sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
+      break;
+    }
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID:
+    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: {
 #ifdef __CAUSTICS_TRICKS__
-			if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
-				break;
+      if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
+        break;
 #endif
-			float3 weight = sd->svm_closure_weight * mix_weight;
-			MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
-
-			if(bsdf) {
-				bsdf->N = N;
-				bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-				bsdf->extra = NULL;
-
-				float eta = fmaxf(param2, 1e-5f);
-				eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
-
-				/* setup bsdf */
-				if(type == CLOSURE_BSDF_REFRACTION_ID) {
-					bsdf->alpha_x = 0.0f;
-					bsdf->alpha_y = 0.0f;
-					bsdf->ior = eta;
-
-					sd->flag |= bsdf_refraction_setup(bsdf);
-				}
-				else {
-					float roughness = sqr(param1);
-					bsdf->alpha_x = roughness;
-					bsdf->alpha_y = roughness;
-					bsdf->ior = eta;
-
-					if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID)
-						sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
-					else
-						sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
-				}
-			}
-
-			break;
-		}
-		case CLOSURE_BSDF_SHARP_GLASS_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+        bsdf->extra = NULL;
+        bsdf->T = stack_load_float3(stack, data_node.y);
+
+        /* rotate tangent */
+        float rotation = stack_load_float(stack, data_node.z);
+
+        if (rotation != 0.0f)
+          bsdf->T = rotate_around_axis(bsdf->T, bsdf->N, rotation * M_2PI_F);
+
+        /* compute roughness */
+        float roughness = sqr(param1);
+        float anisotropy = clamp(param2, -0.99f, 0.99f);
+
+        if (anisotropy < 0.0f) {
+          bsdf->alpha_x = roughness / (1.0f + anisotropy);
+          bsdf->alpha_y = roughness * (1.0f + anisotropy);
+        }
+        else {
+          bsdf->alpha_x = roughness * (1.0f - anisotropy);
+          bsdf->alpha_y = roughness / (1.0f - anisotropy);
+        }
+
+        bsdf->ior = 0.0f;
+
+        if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) {
+          sd->flag |= bsdf_microfacet_beckmann_aniso_setup(bsdf);
+        }
+        else if (type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) {
+          sd->flag |= bsdf_microfacet_ggx_aniso_setup(bsdf);
+        }
+        else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID) {
+          kernel_assert(stack_valid(data_node.w));
+          bsdf->extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+          if (bsdf->extra) {
+            bsdf->extra->color = stack_load_float3(stack, data_node.w);
+            bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
+            bsdf->extra->clearcoat = 0.0f;
+            sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf);
+          }
+        }
+        else
+          sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(bsdf);
+      }
+      break;
+    }
+    case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      VelvetBsdf *bsdf = (VelvetBsdf *)bsdf_alloc(sd, sizeof(VelvetBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+
+        bsdf->sigma = saturate(param1);
+        sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf);
+      }
+      break;
+    }
+    case CLOSURE_BSDF_GLOSSY_TOON_ID:
 #ifdef __CAUSTICS_TRICKS__
-			if(!kernel_data.integrator.caustics_reflective &&
-			   !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
-			{
-				break;
-			}
+      if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
+        break;
+      ATTR_FALLTHROUGH;
 #endif
-			float3 weight = sd->svm_closure_weight * mix_weight;
-
-			/* index of refraction */
-			float eta = fmaxf(param2, 1e-5f);
-			eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
-
-			/* fresnel */
-			float cosNO = dot(N, sd->I);
-			float fresnel = fresnel_dielectric_cos(cosNO, eta);
-			float roughness = sqr(param1);
-
-			/* reflection */
-#ifdef __CAUSTICS_TRICKS__
-			if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#endif
-			{
-				MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight*fresnel);
-
-				if(bsdf) {
-					bsdf->N = N;
-					bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-					bsdf->extra = NULL;
-					svm_node_glass_setup(sd, bsdf, type, eta, roughness, false);
-				}
-			}
-
-			/* refraction */
-#ifdef __CAUSTICS_TRICKS__
-			if(kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#endif
-			{
-				MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight*(1.0f - fresnel));
-
-				if(bsdf) {
-					bsdf->N = N;
-					bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-					bsdf->extra = NULL;
-					svm_node_glass_setup(sd, bsdf, type, eta, roughness, true);
-				}
-			}
-
-			break;
-		}
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: {
-#ifdef __CAUSTICS_TRICKS__
-			if(!kernel_data.integrator.caustics_reflective && !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
-				break;
-#endif
-			float3 weight = sd->svm_closure_weight * mix_weight;
-			MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
-			if(!bsdf) {
-				break;
-			}
-
-			MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
-			if(!extra) {
-				break;
-			}
-
-			bsdf->N = N;
-			bsdf->extra = extra;
-			bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-
-			float roughness = sqr(param1);
-			bsdf->alpha_x = roughness;
-			bsdf->alpha_y = roughness;
-			float eta = fmaxf(param2, 1e-5f);
-			bsdf->ior = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
-
-			kernel_assert(stack_valid(data_node.z));
-			bsdf->extra->color = stack_load_float3(stack, data_node.z);
-			bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
-			bsdf->extra->clearcoat = 0.0f;
-
-			/* setup bsdf */
-			sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
-			break;
-		}
-		case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
-		case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
-		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID:
-		case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: {
-#ifdef __CAUSTICS_TRICKS__
-			if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
-				break;
-#endif
-			float3 weight = sd->svm_closure_weight * mix_weight;
-			MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
-
-			if(bsdf) {
-				bsdf->N = N;
-				bsdf->extra = NULL;
-				bsdf->T = stack_load_float3(stack, data_node.y);
-
-				/* rotate tangent */
-				float rotation = stack_load_float(stack, data_node.z);
-
-				if(rotation != 0.0f)
-					bsdf->T = rotate_around_axis(bsdf->T, bsdf->N, rotation * M_2PI_F);
-
-				/* compute roughness */
-				float roughness = sqr(param1);
-				float anisotropy = clamp(param2, -0.99f, 0.99f);
-
-				if(anisotropy < 0.0f) {
-					bsdf->alpha_x = roughness/(1.0f + anisotropy);
-					bsdf->alpha_y = roughness*(1.0f + anisotropy);
-				}
-				else {
-					bsdf->alpha_x = roughness*(1.0f - anisotropy);
-					bsdf->alpha_y = roughness/(1.0f - anisotropy);
-				}
-
-				bsdf->ior = 0.0f;
-
-				if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) {
-					sd->flag |= bsdf_microfacet_beckmann_aniso_setup(bsdf);
-				}
-				else if(type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) {
-					sd->flag |= bsdf_microfacet_ggx_aniso_setup(bsdf);
-				}
-				else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID) {
-					kernel_assert(stack_valid(data_node.w));
-					bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
-					if(bsdf->extra) {
-						bsdf->extra->color = stack_load_float3(stack, data_node.w);
-						bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
-						bsdf->extra->clearcoat = 0.0f;
-						sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf);
-					}
-				}
-				else
-					sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(bsdf);
-			}
-			break;
-		}
-		case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: {
-			float3 weight = sd->svm_closure_weight * mix_weight;
-			VelvetBsdf *bsdf = (VelvetBsdf*)bsdf_alloc(sd, sizeof(VelvetBsdf), weight);
-
-			if(bsdf) {
-				bsdf->N = N;
-
-				bsdf->sigma = saturate(param1);
-				sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf);
-			}
-			break;
-		}
-		case CLOSURE_BSDF_GLOSSY_TOON_ID:
-#ifdef __CAUSTICS_TRICKS__
-			if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
-				break;
-			ATTR_FALLTHROUGH;
-#endif
-		case CLOSURE_BSDF_DIFFUSE_TOON_ID: {
-			float3 weight = sd->svm_closure_weight * mix_weight;
-			ToonBsdf *bsdf = (ToonBsdf*)bsdf_alloc(sd, sizeof(ToonBsdf), weight);
-
-			if(bsdf) {
-				bsdf->N = N;
-				bsdf->size = param1;
-				bsdf->smooth = param2;
-
-				if(type == CLOSURE_BSDF_DIFFUSE_TOON_ID)
-					sd->flag |= bsdf_diffuse_toon_setup(bsdf);
-				else
-					sd->flag |= bsdf_glossy_toon_setup(bsdf);
-			}
-			break;
-		}
+    case CLOSURE_BSDF_DIFFUSE_TOON_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      ToonBsdf *bsdf = (ToonBsdf *)bsdf_alloc(sd, sizeof(ToonBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+        bsdf->size = param1;
+        bsdf->smooth = param2;
+
+        if (type == CLOSURE_BSDF_DIFFUSE_TOON_ID)
+          sd->flag |= bsdf_diffuse_toon_setup(bsdf);
+        else
+          sd->flag |= bsdf_glossy_toon_setup(bsdf);
+      }
+      break;
+    }
 #ifdef __HAIR__
-		case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: {
-			uint4 data_node2 = read_node(kg, offset);
-			uint4 data_node3 = read_node(kg, offset);
-			uint4 data_node4 = read_node(kg, offset);
-
-			float3 weight = sd->svm_closure_weight * mix_weight;
-
-			uint offset_ofs, ior_ofs, color_ofs, parametrization;
-			decode_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, &parametrization);
-			float alpha = stack_load_float_default(stack, offset_ofs, data_node.z);
-			float ior = stack_load_float_default(stack, ior_ofs, data_node.w);
-
-			uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs;
-			decode_node_uchar4(data_node2.x, &coat_ofs, &melanin_ofs, &melanin_redness_ofs, &absorption_coefficient_ofs);
-
-			uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs;
-			decode_node_uchar4(data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs);
-
-			const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y);
-			float random = 0.0f;
-			if(attr_descr_random.offset != ATTR_STD_NOT_FOUND) {
-				random = primitive_surface_attribute_float(kg, sd, attr_descr_random, NULL, NULL);
-			}
-			else {
-				random = stack_load_float_default(stack, random_ofs, data_node3.y);
-			}
-
-
-			PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)bsdf_alloc(sd, sizeof(PrincipledHairBSDF), weight);
-			if(bsdf) {
-				PrincipledHairExtra *extra = (PrincipledHairExtra*)closure_alloc_extra(sd, sizeof(PrincipledHairExtra));
-
-				if(!extra)
-					break;
-
-				/* Random factors range: [-randomization/2, +randomization/2]. */
-				float random_roughness = stack_load_float_default(stack, random_roughness_ofs, data_node3.w);
-				float factor_random_roughness = 1.0f + 2.0f*(random - 0.5f)*random_roughness;
-				float roughness = param1 * factor_random_roughness;
-				float radial_roughness = param2 * factor_random_roughness;
-
-				/* Remap Coat value to [0, 100]% of Roughness. */
-				float coat = stack_load_float_default(stack, coat_ofs, data_node2.y);
-				float m0_roughness = 1.0f - clamp(coat, 0.0f, 1.0f);
-
-				bsdf->N = N;
-				bsdf->v = roughness;
-				bsdf->s = radial_roughness;
-				bsdf->m0_roughness = m0_roughness;
-				bsdf->alpha = alpha;
-				bsdf->eta = ior;
-				bsdf->extra = extra;
-
-				switch(parametrization) {
-					case NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION: {
-						float3 absorption_coefficient = stack_load_float3(stack, absorption_coefficient_ofs);
-						bsdf->sigma = absorption_coefficient;
-						break;
-					}
-					case NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION: {
-						float melanin = stack_load_float_default(stack, melanin_ofs, data_node2.z);
-						float melanin_redness = stack_load_float_default(stack, melanin_redness_ofs, data_node2.w);
-
-						/* Randomize melanin.  */
-						float random_color = stack_load_float_default(stack, random_color_ofs, data_node3.z);
-						random_color = clamp(random_color, 0.0f, 1.0f);
-						float factor_random_color = 1.0f + 2.0f * (random - 0.5f) * random_color;
-						melanin *= factor_random_color;
-
-						/* Map melanin 0..inf from more perceptually linear 0..1. */
-						melanin = -logf(fmaxf(1.0f - melanin, 0.0001f));
-
-						/* Benedikt Bitterli's melanin ratio remapping. */
-						float eumelanin = melanin * (1.0f - melanin_redness);
-						float pheomelanin = melanin * melanin_redness;
-						float3 melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin);
-
-						/* Optional tint. */
-						float3 tint = stack_load_float3(stack, tint_ofs);
-						float3 tint_sigma = sigma_from_reflectance(tint, radial_roughness);
-
-						bsdf->sigma = melanin_sigma + tint_sigma;
-						break;
-					}
-					case NODE_PRINCIPLED_HAIR_REFLECTANCE: {
-						float3 color = stack_load_float3(stack, color_ofs);
-						bsdf->sigma = sigma_from_reflectance(color, radial_roughness);
-						break;
-					}
-					default: {
-						/* Fallback to brownish hair, same as defaults for melanin. */
-						kernel_assert(!"Invalid Principled Hair parametrization!");
-						bsdf->sigma = sigma_from_concentration(0.0f, 0.8054375f);
-						break;
-					}
-				}
-
-				sd->flag |= bsdf_principled_hair_setup(sd, bsdf);
-			}
-			break;
-		}
-		case CLOSURE_BSDF_HAIR_REFLECTION_ID:
-		case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
-			float3 weight = sd->svm_closure_weight * mix_weight;
-
-			if(sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) {
-				/* todo: giving a fixed weight here will cause issues when
-				 * mixing multiple BSDFS. energy will not be conserved and
-				 * the throughput can blow up after multiple bounces. we
-				 * better figure out a way to skip backfaces from rays
-				 * spawned by transmission from the front */
-				bsdf_transparent_setup(sd, make_float3(1.0f, 1.0f, 1.0f), path_flag);
-			}
-			else {
-				HairBsdf *bsdf = (HairBsdf*)bsdf_alloc(sd, sizeof(HairBsdf), weight);
-
-				if(bsdf) {
-					bsdf->N = N;
-					bsdf->roughness1 = param1;
-					bsdf->roughness2 = param2;
-					bsdf->offset = -stack_load_float(stack, data_node.z);
-
-					if(stack_valid(data_node.y)) {
-						bsdf->T = normalize(stack_load_float3(stack, data_node.y));
-					}
-					else if(!(sd->type & PRIMITIVE_ALL_CURVE)) {
-						bsdf->T = normalize(sd->dPdv);
-						bsdf->offset = 0.0f;
-					}
-					else
-						bsdf->T = normalize(sd->dPdu);
-
-					if(type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
-						sd->flag |= bsdf_hair_reflection_setup(bsdf);
-					}
-					else {
-						sd->flag |= bsdf_hair_transmission_setup(bsdf);
-					}
-				}
-			}
-
-			break;
-		}
-#endif  /* __HAIR__ */
+    case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: {
+      uint4 data_node2 = read_node(kg, offset);
+      uint4 data_node3 = read_node(kg, offset);
+      uint4 data_node4 = read_node(kg, offset);
+
+      float3 weight = sd->svm_closure_weight * mix_weight;
+
+      uint offset_ofs, ior_ofs, color_ofs, parametrization;
+      decode_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, &parametrization);
+      float alpha = stack_load_float_default(stack, offset_ofs, data_node.z);
+      float ior = stack_load_float_default(stack, ior_ofs, data_node.w);
+
+      uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs;
+      decode_node_uchar4(data_node2.x,
+                         &coat_ofs,
+                         &melanin_ofs,
+                         &melanin_redness_ofs,
+                         &absorption_coefficient_ofs);
+
+      uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs;
+      decode_node_uchar4(
+          data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs);
+
+      const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y);
+      float random = 0.0f;
+      if (attr_descr_random.offset != ATTR_STD_NOT_FOUND) {
+        random = primitive_surface_attribute_float(kg, sd, attr_descr_random, NULL, NULL);
+      }
+      else {
+        random = stack_load_float_default(stack, random_ofs, data_node3.y);
+      }
+
+      PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc(
+          sd, sizeof(PrincipledHairBSDF), weight);
+      if (bsdf) {
+        PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra(
+            sd, sizeof(PrincipledHairExtra));
+
+        if (!extra)
+          break;
+
+        /* Random factors range: [-randomization/2, +randomization/2]. */
+        float random_roughness = stack_load_float_default(
+            stack, random_roughness_ofs, data_node3.w);
+        float factor_random_roughness = 1.0f + 2.0f * (random - 0.5f) * random_roughness;
+        float roughness = param1 * factor_random_roughness;
+        float radial_roughness = param2 * factor_random_roughness;
+
+        /* Remap Coat value to [0, 100]% of Roughness. */
+        float coat = stack_load_float_default(stack, coat_ofs, data_node2.y);
+        float m0_roughness = 1.0f - clamp(coat, 0.0f, 1.0f);
+
+        bsdf->N = N;
+        bsdf->v = roughness;
+        bsdf->s = radial_roughness;
+        bsdf->m0_roughness = m0_roughness;
+        bsdf->alpha = alpha;
+        bsdf->eta = ior;
+        bsdf->extra = extra;
+
+        switch (parametrization) {
+          case NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION: {
+            float3 absorption_coefficient = stack_load_float3(stack, absorption_coefficient_ofs);
+            bsdf->sigma = absorption_coefficient;
+            break;
+          }
+          case NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION: {
+            float melanin = stack_load_float_default(stack, melanin_ofs, data_node2.z);
+            float melanin_redness = stack_load_float_default(
+                stack, melanin_redness_ofs, data_node2.w);
+
+            /* Randomize melanin.  */
+            float random_color = stack_load_float_default(stack, random_color_ofs, data_node3.z);
+            random_color = clamp(random_color, 0.0f, 1.0f);
+            float factor_random_color = 1.0f + 2.0f * (random - 0.5f) * random_color;
+            melanin *= factor_random_color;
+
+            /* Map melanin 0..inf from more perceptually linear 0..1. */
+            melanin = -logf(fmaxf(1.0f - melanin, 0.0001f));
+
+            /* Benedikt Bitterli's melanin ratio remapping. */
+            float eumelanin = melanin * (1.0f - melanin_redness);
+            float pheomelanin = melanin * melanin_redness;
+            float3 melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin);
+
+            /* Optional tint. */
+            float3 tint = stack_load_float3(stack, tint_ofs);
+            float3 tint_sigma = sigma_from_reflectance(tint, radial_roughness);
+
+            bsdf->sigma = melanin_sigma + tint_sigma;
+            break;
+          }
+          case NODE_PRINCIPLED_HAIR_REFLECTANCE: {
+            float3 color = stack_load_float3(stack, color_ofs);
+            bsdf->sigma = sigma_from_reflectance(color, radial_roughness);
+            break;
+          }
+          default: {
+            /* Fallback to brownish hair, same as defaults for melanin. */
+            kernel_assert(!"Invalid Principled Hair parametrization!");
+            bsdf->sigma = sigma_from_concentration(0.0f, 0.8054375f);
+            break;
+          }
+        }
+
+        sd->flag |= bsdf_principled_hair_setup(sd, bsdf);
+      }
+      break;
+    }
+    case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+    case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+
+      if (sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) {
+        /* todo: giving a fixed weight here will cause issues when
+         * mixing multiple BSDFS. energy will not be conserved and
+         * the throughput can blow up after multiple bounces. we
+         * better figure out a way to skip backfaces from rays
+         * spawned by transmission from the front */
+        bsdf_transparent_setup(sd, make_float3(1.0f, 1.0f, 1.0f), path_flag);
+      }
+      else {
+        HairBsdf *bsdf = (HairBsdf *)bsdf_alloc(sd, sizeof(HairBsdf), weight);
+
+        if (bsdf) {
+          bsdf->N = N;
+          bsdf->roughness1 = param1;
+          bsdf->roughness2 = param2;
+          bsdf->offset = -stack_load_float(stack, data_node.z);
+
+          if (stack_valid(data_node.y)) {
+            bsdf->T = normalize(stack_load_float3(stack, data_node.y));
+          }
+          else if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
+            bsdf->T = normalize(sd->dPdv);
+            bsdf->offset = 0.0f;
+          }
+          else
+            bsdf->T = normalize(sd->dPdu);
+
+          if (type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
+            sd->flag |= bsdf_hair_reflection_setup(bsdf);
+          }
+          else {
+            sd->flag |= bsdf_hair_transmission_setup(bsdf);
+          }
+        }
+      }
+
+      break;
+    }
+#endif /* __HAIR__ */
 
 #ifdef __SUBSURFACE__
-		case CLOSURE_BSSRDF_CUBIC_ID:
-		case CLOSURE_BSSRDF_GAUSSIAN_ID:
-		case CLOSURE_BSSRDF_BURLEY_ID:
-		case CLOSURE_BSSRDF_RANDOM_WALK_ID: {
-			float3 weight = sd->svm_closure_weight * mix_weight;
-			Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
-
-			if(bssrdf) {
-				/* disable in case of diffuse ancestor, can't see it well then and
-				 * adds considerably noise due to probabilities of continuing path
-				 * getting lower and lower */
-				if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR)
-					param1 = 0.0f;
-
-				bssrdf->radius = stack_load_float3(stack, data_node.z)*param1;
-				bssrdf->albedo = sd->svm_closure_weight;
-				bssrdf->texture_blur = param2;
-				bssrdf->sharpness = stack_load_float(stack, data_node.w);
-				bssrdf->N = N;
-				bssrdf->roughness = 0.0f;
-				sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type);
-			}
-
-			break;
-		}
+    case CLOSURE_BSSRDF_CUBIC_ID:
+    case CLOSURE_BSSRDF_GAUSSIAN_ID:
+    case CLOSURE_BSSRDF_BURLEY_ID:
+    case CLOSURE_BSSRDF_RANDOM_WALK_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
+
+      if (bssrdf) {
+        /* disable in case of diffuse ancestor, can't see it well then and
+         * adds considerably noise due to probabilities of continuing path
+         * getting lower and lower */
+        if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR)
+          param1 = 0.0f;
+
+        bssrdf->radius = stack_load_float3(stack, data_node.z) * param1;
+        bssrdf->albedo = sd->svm_closure_weight;
+        bssrdf->texture_blur = param2;
+        bssrdf->sharpness = stack_load_float(stack, data_node.w);
+        bssrdf->N = N;
+        bssrdf->roughness = 0.0f;
+        sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type);
+      }
+
+      break;
+    }
 #endif
-		default:
-			break;
-	}
+    default:
+      break;
+  }
 }
 
-ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type)
+ccl_device void svm_node_closure_volume(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type)
 {
 #ifdef __VOLUME__
-	/* Only sum extinction for volumes, variable is shared with surface transparency. */
-	if(shader_type != SHADER_TYPE_VOLUME) {
-		return;
-	}
-
-	uint type, density_offset, anisotropy_offset;
-
-	uint mix_weight_offset;
-	decode_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
-	float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f);
-
-	if(mix_weight == 0.0f) {
-		return;
-	}
-
-	float density = (stack_valid(density_offset))? stack_load_float(stack, density_offset): __uint_as_float(node.z);
-	density = mix_weight * fmaxf(density, 0.0f);
-
-	/* Compute scattering coefficient. */
-	float3 weight = sd->svm_closure_weight;
-
-	if(type == CLOSURE_VOLUME_ABSORPTION_ID) {
-		weight = make_float3(1.0f, 1.0f, 1.0f) - weight;
-	}
-
-	weight *= density;
-
-	/* Add closure for volume scattering. */
-	if(type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
-		HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume*)bsdf_alloc(sd, sizeof(HenyeyGreensteinVolume), weight);
-
-		if(volume) {
-			float anisotropy = (stack_valid(anisotropy_offset))? stack_load_float(stack, anisotropy_offset): __uint_as_float(node.w);
-			volume->g = anisotropy; /* g */
-			sd->flag |= volume_henyey_greenstein_setup(volume);
-		}
-	}
-
-	/* Sum total extinction weight. */
-	volume_extinction_setup(sd, weight);
+  /* Only sum extinction for volumes, variable is shared with surface transparency. */
+  if (shader_type != SHADER_TYPE_VOLUME) {
+    return;
+  }
+
+  uint type, density_offset, anisotropy_offset;
+
+  uint mix_weight_offset;
+  decode_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
+  float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
+                                                       1.0f);
+
+  if (mix_weight == 0.0f) {
+    return;
+  }
+
+  float density = (stack_valid(density_offset)) ? stack_load_float(stack, density_offset) :
+                                                  __uint_as_float(node.z);
+  density = mix_weight * fmaxf(density, 0.0f);
+
+  /* Compute scattering coefficient. */
+  float3 weight = sd->svm_closure_weight;
+
+  if (type == CLOSURE_VOLUME_ABSORPTION_ID) {
+    weight = make_float3(1.0f, 1.0f, 1.0f) - weight;
+  }
+
+  weight *= density;
+
+  /* Add closure for volume scattering. */
+  if (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+    HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc(
+        sd, sizeof(HenyeyGreensteinVolume), weight);
+
+    if (volume) {
+      float anisotropy = (stack_valid(anisotropy_offset)) ?
+                             stack_load_float(stack, anisotropy_offset) :
+                             __uint_as_float(node.w);
+      volume->g = anisotropy; /* g */
+      sd->flag |= volume_henyey_greenstein_setup(volume);
+    }
+  }
+
+  /* Sum total extinction weight. */
+  volume_extinction_setup(sd, weight);
 #endif
 }
 
-ccl_device void svm_node_principled_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag, int *offset)
+ccl_device void svm_node_principled_volume(KernelGlobals *kg,
+                                           ShaderData *sd,
+                                           float *stack,
+                                           uint4 node,
+                                           ShaderType shader_type,
+                                           int path_flag,
+                                           int *offset)
 {
 #ifdef __VOLUME__
-	uint4 value_node = read_node(kg, offset);
-	uint4 attr_node = read_node(kg, offset);
-
-	/* Only sum extinction for volumes, variable is shared with surface transparency. */
-	if(shader_type != SHADER_TYPE_VOLUME) {
-		return;
-	}
-
-	uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset;
-	decode_node_uchar4(node.y, &density_offset, &anisotropy_offset, &absorption_color_offset, &mix_weight_offset);
-	float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f);
-
-	if(mix_weight == 0.0f) {
-		return;
-	}
-
-	/* Compute density. */
-	float primitive_density = 1.0f;
-	float density = (stack_valid(density_offset))? stack_load_float(stack, density_offset): __uint_as_float(value_node.x);
-	density = mix_weight * fmaxf(density, 0.0f);
-
-	if(density > CLOSURE_WEIGHT_CUTOFF) {
-		/* Density and color attribute lookup if available. */
-		const AttributeDescriptor attr_density = find_attribute(kg, sd, attr_node.x);
-		if(attr_density.offset != ATTR_STD_NOT_FOUND) {
-			primitive_density = primitive_volume_attribute_float(kg, sd, attr_density);
-			density = fmaxf(density * primitive_density, 0.0f);
-		}
-	}
-
-	if(density > CLOSURE_WEIGHT_CUTOFF) {
-		/* Compute scattering color. */
-		float3 color = sd->svm_closure_weight;
-
-		const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y);
-		if(attr_color.offset != ATTR_STD_NOT_FOUND) {
-			color *= primitive_volume_attribute_float3(kg, sd, attr_color);
-		}
-
-		/* Add closure for volume scattering. */
-		HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume*)bsdf_alloc(sd, sizeof(HenyeyGreensteinVolume), color * density);
-		if(volume) {
-			float anisotropy = (stack_valid(anisotropy_offset))? stack_load_float(stack, anisotropy_offset): __uint_as_float(value_node.y);
-			volume->g = anisotropy;
-			sd->flag |= volume_henyey_greenstein_setup(volume);
-		}
-
-		/* Add extinction weight. */
-		float3 zero = make_float3(0.0f, 0.0f, 0.0f);
-		float3 one = make_float3(1.0f, 1.0f, 1.0f);
-		float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)), zero);
-		float3 absorption = max(one - color, zero) * max(one - absorption_color, zero);
-		volume_extinction_setup(sd, (color + absorption) * density);
-	}
-
-	/* Compute emission. */
-	if(path_flag & PATH_RAY_SHADOW) {
-		/* Don't need emission for shadows. */
-		return;
-	}
-
-	uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset;
-	decode_node_uchar4(node.z, &emission_offset, &emission_color_offset, &blackbody_offset, &temperature_offset);
-	float emission = (stack_valid(emission_offset))? stack_load_float(stack, emission_offset): __uint_as_float(value_node.z);
-	float blackbody = (stack_valid(blackbody_offset))? stack_load_float(stack, blackbody_offset): __uint_as_float(value_node.w);
-
-	if(emission > CLOSURE_WEIGHT_CUTOFF) {
-		float3 emission_color = stack_load_float3(stack, emission_color_offset);
-		emission_setup(sd, emission * emission_color);
-	}
-
-	if(blackbody > CLOSURE_WEIGHT_CUTOFF) {
-		float T = stack_load_float(stack, temperature_offset);
-
-		/* Add flame temperature from attribute if available. */
-		const AttributeDescriptor attr_temperature = find_attribute(kg, sd, attr_node.z);
-		if(attr_temperature.offset != ATTR_STD_NOT_FOUND) {
-			float temperature = primitive_volume_attribute_float(kg, sd, attr_temperature);
-			T *= fmaxf(temperature, 0.0f);
-		}
-
-		T = fmaxf(T, 0.0f);
-
-		/* Stefan-Boltzmann law. */
-		float T4 = sqr(sqr(T));
-		float sigma = 5.670373e-8f * 1e-6f / M_PI_F;
-		float intensity = sigma * mix(1.0f, T4, blackbody);
-
-		if(intensity > CLOSURE_WEIGHT_CUTOFF) {
-			float3 blackbody_tint = stack_load_float3(stack, node.w);
-			float3 bb = blackbody_tint * intensity * svm_math_blackbody_color(T);
-			emission_setup(sd, bb);
-		}
-	}
+  uint4 value_node = read_node(kg, offset);
+  uint4 attr_node = read_node(kg, offset);
+
+  /* Only sum extinction for volumes, variable is shared with surface transparency. */
+  if (shader_type != SHADER_TYPE_VOLUME) {
+    return;
+  }
+
+  uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset;
+  decode_node_uchar4(
+      node.y, &density_offset, &anisotropy_offset, &absorption_color_offset, &mix_weight_offset);
+  float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
+                                                       1.0f);
+
+  if (mix_weight == 0.0f) {
+    return;
+  }
+
+  /* Compute density. */
+  float primitive_density = 1.0f;
+  float density = (stack_valid(density_offset)) ? stack_load_float(stack, density_offset) :
+                                                  __uint_as_float(value_node.x);
+  density = mix_weight * fmaxf(density, 0.0f);
+
+  if (density > CLOSURE_WEIGHT_CUTOFF) {
+    /* Density and color attribute lookup if available. */
+    const AttributeDescriptor attr_density = find_attribute(kg, sd, attr_node.x);
+    if (attr_density.offset != ATTR_STD_NOT_FOUND) {
+      primitive_density = primitive_volume_attribute_float(kg, sd, attr_density);
+      density = fmaxf(density * primitive_density, 0.0f);
+    }
+  }
+
+  if (density > CLOSURE_WEIGHT_CUTOFF) {
+    /* Compute scattering color. */
+    float3 color = sd->svm_closure_weight;
+
+    const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y);
+    if (attr_color.offset != ATTR_STD_NOT_FOUND) {
+      color *= primitive_volume_attribute_float3(kg, sd, attr_color);
+    }
+
+    /* Add closure for volume scattering. */
+    HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc(
+        sd, sizeof(HenyeyGreensteinVolume), color * density);
+    if (volume) {
+      float anisotropy = (stack_valid(anisotropy_offset)) ?
+                             stack_load_float(stack, anisotropy_offset) :
+                             __uint_as_float(value_node.y);
+      volume->g = anisotropy;
+      sd->flag |= volume_henyey_greenstein_setup(volume);
+    }
+
+    /* Add extinction weight. */
+    float3 zero = make_float3(0.0f, 0.0f, 0.0f);
+    float3 one = make_float3(1.0f, 1.0f, 1.0f);
+    float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)), zero);
+    float3 absorption = max(one - color, zero) * max(one - absorption_color, zero);
+    volume_extinction_setup(sd, (color + absorption) * density);
+  }
+
+  /* Compute emission. */
+  if (path_flag & PATH_RAY_SHADOW) {
+    /* Don't need emission for shadows. */
+    return;
+  }
+
+  uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset;
+  decode_node_uchar4(
+      node.z, &emission_offset, &emission_color_offset, &blackbody_offset, &temperature_offset);
+  float emission = (stack_valid(emission_offset)) ? stack_load_float(stack, emission_offset) :
+                                                    __uint_as_float(value_node.z);
+  float blackbody = (stack_valid(blackbody_offset)) ? stack_load_float(stack, blackbody_offset) :
+                                                      __uint_as_float(value_node.w);
+
+  if (emission > CLOSURE_WEIGHT_CUTOFF) {
+    float3 emission_color = stack_load_float3(stack, emission_color_offset);
+    emission_setup(sd, emission * emission_color);
+  }
+
+  if (blackbody > CLOSURE_WEIGHT_CUTOFF) {
+    float T = stack_load_float(stack, temperature_offset);
+
+    /* Add flame temperature from attribute if available. */
+    const AttributeDescriptor attr_temperature = find_attribute(kg, sd, attr_node.z);
+    if (attr_temperature.offset != ATTR_STD_NOT_FOUND) {
+      float temperature = primitive_volume_attribute_float(kg, sd, attr_temperature);
+      T *= fmaxf(temperature, 0.0f);
+    }
+
+    T = fmaxf(T, 0.0f);
+
+    /* Stefan-Boltzmann law. */
+    float T4 = sqr(sqr(T));
+    float sigma = 5.670373e-8f * 1e-6f / M_PI_F;
+    float intensity = sigma * mix(1.0f, T4, blackbody);
+
+    if (intensity > CLOSURE_WEIGHT_CUTOFF) {
+      float3 blackbody_tint = stack_load_float3(stack, node.w);
+      float3 bb = blackbody_tint * intensity * svm_math_blackbody_color(T);
+      emission_setup(sd, bb);
+    }
+  }
 #endif
 }
 
 ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 node)
 {
-	uint mix_weight_offset = node.y;
-	float3 weight = sd->svm_closure_weight;
+  uint mix_weight_offset = node.y;
+  float3 weight = sd->svm_closure_weight;
 
-	if(stack_valid(mix_weight_offset)) {
-		float mix_weight = stack_load_float(stack, mix_weight_offset);
+  if (stack_valid(mix_weight_offset)) {
+    float mix_weight = stack_load_float(stack, mix_weight_offset);
 
-		if(mix_weight == 0.0f)
-			return;
+    if (mix_weight == 0.0f)
+      return;
 
-		weight *= mix_weight;
-	}
+    weight *= mix_weight;
+  }
 
-	emission_setup(sd, weight);
+  emission_setup(sd, weight);
 }
 
 ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node)
 {
-	uint mix_weight_offset = node.y;
-	float3 weight = sd->svm_closure_weight;
+  uint mix_weight_offset = node.y;
+  float3 weight = sd->svm_closure_weight;
 
-	if(stack_valid(mix_weight_offset)) {
-		float mix_weight = stack_load_float(stack, mix_weight_offset);
+  if (stack_valid(mix_weight_offset)) {
+    float mix_weight = stack_load_float(stack, mix_weight_offset);
 
-		if(mix_weight == 0.0f)
-			return;
+    if (mix_weight == 0.0f)
+      return;
 
-		weight *= mix_weight;
-	}
+    weight *= mix_weight;
+  }
 
-	background_setup(sd, weight);
+  background_setup(sd, weight);
 }
 
 ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node)
 {
-	uint mix_weight_offset = node.y;
+  uint mix_weight_offset = node.y;
 
-	if(stack_valid(mix_weight_offset)) {
-		float mix_weight = stack_load_float(stack, mix_weight_offset);
+  if (stack_valid(mix_weight_offset)) {
+    float mix_weight = stack_load_float(stack, mix_weight_offset);
 
-		if(mix_weight == 0.0f)
-			return;
+    if (mix_weight == 0.0f)
+      return;
 
-		closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight * mix_weight);
-	}
-	else
-		closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight);
+    closure_alloc(
+        sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight * mix_weight);
+  }
+  else
+    closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight);
 
-	sd->flag |= SD_HOLDOUT;
+  sd->flag |= SD_HOLDOUT;
 }
 
 /* Closure Nodes */
 
 ccl_device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight)
 {
-	sd->svm_closure_weight = weight;
+  sd->svm_closure_weight = weight;
 }
 
 ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b)
 {
-	float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
-	svm_node_closure_store_weight(sd, weight);
+  float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
+  svm_node_closure_store_weight(sd, weight);
 }
 
 ccl_device void svm_node_closure_weight(ShaderData *sd, float *stack, uint weight_offset)
 {
-	float3 weight = stack_load_float3(stack, weight_offset);
+  float3 weight = stack_load_float3(stack, weight_offset);
 
-	svm_node_closure_store_weight(sd, weight);
+  svm_node_closure_store_weight(sd, weight);
 }
 
-ccl_device void svm_node_emission_weight(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device void svm_node_emission_weight(KernelGlobals *kg,
+                                         ShaderData *sd,
+                                         float *stack,
+                                         uint4 node)
 {
-	uint color_offset = node.y;
-	uint strength_offset = node.z;
+  uint color_offset = node.y;
+  uint strength_offset = node.z;
 
-	float strength = stack_load_float(stack, strength_offset);
-	float3 weight = stack_load_float3(stack, color_offset)*strength;
+  float strength = stack_load_float(stack, strength_offset);
+  float3 weight = stack_load_float3(stack, color_offset) * strength;
 
-	svm_node_closure_store_weight(sd, weight);
+  svm_node_closure_store_weight(sd, weight);
 }
 
 ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
 {
-	/* fetch weight from blend input, previous mix closures,
-	 * and write to stack to be used by closure nodes later */
-	uint weight_offset, in_weight_offset, weight1_offset, weight2_offset;
-	decode_node_uchar4(node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);
+  /* fetch weight from blend input, previous mix closures,
+   * and write to stack to be used by closure nodes later */
+  uint weight_offset, in_weight_offset, weight1_offset, weight2_offset;
+  decode_node_uchar4(node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);
 
-	float weight = stack_load_float(stack, weight_offset);
-	weight = saturate(weight);
+  float weight = stack_load_float(stack, weight_offset);
+  weight = saturate(weight);
 
-	float in_weight = (stack_valid(in_weight_offset))? stack_load_float(stack, in_weight_offset): 1.0f;
+  float in_weight = (stack_valid(in_weight_offset)) ? stack_load_float(stack, in_weight_offset) :
+                                                      1.0f;
 
-	if(stack_valid(weight1_offset))
-		stack_store_float(stack, weight1_offset, in_weight*(1.0f - weight));
-	if(stack_valid(weight2_offset))
-		stack_store_float(stack, weight2_offset, in_weight*weight);
+  if (stack_valid(weight1_offset))
+    stack_store_float(stack, weight1_offset, in_weight * (1.0f - weight));
+  if (stack_valid(weight2_offset))
+    stack_store_float(stack, weight2_offset, in_weight * weight);
 }
 
 /* (Bump) normal */
 
-ccl_device void svm_node_set_normal(KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal)
+ccl_device void svm_node_set_normal(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal)
 {
-	float3 normal = stack_load_float3(stack, in_direction);
-	sd->N = normal;
-	stack_store_float3(stack, out_normal, normal);
+  float3 normal = stack_load_float3(stack, in_direction);
+  sd->N = normal;
+  stack_store_float3(stack, out_normal, normal);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_color_util.h b/intern/cycles/kernel/svm/svm_color_util.h
index d5945f915c6..12b59d2616b 100644
--- a/intern/cycles/kernel/svm/svm_color_util.h
+++ b/intern/cycles/kernel/svm/svm_color_util.h
@@ -18,288 +18,310 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device float3 svm_mix_blend(float t, float3 col1, float3 col2)
 {
-	return interp(col1, col2, t);
+  return interp(col1, col2, t);
 }
 
 ccl_device float3 svm_mix_add(float t, float3 col1, float3 col2)
 {
-	return interp(col1, col1 + col2, t);
+  return interp(col1, col1 + col2, t);
 }
 
 ccl_device float3 svm_mix_mul(float t, float3 col1, float3 col2)
 {
-	return interp(col1, col1 * col2, t);
+  return interp(col1, col1 * col2, t);
 }
 
 ccl_device float3 svm_mix_screen(float t, float3 col1, float3 col2)
 {
-	float tm = 1.0f - t;
-	float3 one = make_float3(1.0f, 1.0f, 1.0f);
-	float3 tm3 = make_float3(tm, tm, tm);
+  float tm = 1.0f - t;
+  float3 one = make_float3(1.0f, 1.0f, 1.0f);
+  float3 tm3 = make_float3(tm, tm, tm);
 
-	return one - (tm3 + t*(one - col2))*(one - col1);
+  return one - (tm3 + t * (one - col2)) * (one - col1);
 }
 
 ccl_device float3 svm_mix_overlay(float t, float3 col1, float3 col2)
 {
-	float tm = 1.0f - t;
+  float tm = 1.0f - t;
 
-	float3 outcol = col1;
+  float3 outcol = col1;
 
-	if(outcol.x < 0.5f)
-		outcol.x *= tm + 2.0f*t*col2.x;
-	else
-		outcol.x = 1.0f - (tm + 2.0f*t*(1.0f - col2.x))*(1.0f - outcol.x);
+  if (outcol.x < 0.5f)
+    outcol.x *= tm + 2.0f * t * col2.x;
+  else
+    outcol.x = 1.0f - (tm + 2.0f * t * (1.0f - col2.x)) * (1.0f - outcol.x);
 
-	if(outcol.y < 0.5f)
-		outcol.y *= tm + 2.0f*t*col2.y;
-	else
-		outcol.y = 1.0f - (tm + 2.0f*t*(1.0f - col2.y))*(1.0f - outcol.y);
+  if (outcol.y < 0.5f)
+    outcol.y *= tm + 2.0f * t * col2.y;
+  else
+    outcol.y = 1.0f - (tm + 2.0f * t * (1.0f - col2.y)) * (1.0f - outcol.y);
 
-	if(outcol.z < 0.5f)
-		outcol.z *= tm + 2.0f*t*col2.z;
-	else
-		outcol.z = 1.0f - (tm + 2.0f*t*(1.0f - col2.z))*(1.0f - outcol.z);
+  if (outcol.z < 0.5f)
+    outcol.z *= tm + 2.0f * t * col2.z;
+  else
+    outcol.z = 1.0f - (tm + 2.0f * t * (1.0f - col2.z)) * (1.0f - outcol.z);
 
-	return outcol;
+  return outcol;
 }
 
 ccl_device float3 svm_mix_sub(float t, float3 col1, float3 col2)
 {
-	return interp(col1, col1 - col2, t);
+  return interp(col1, col1 - col2, t);
 }
 
 ccl_device float3 svm_mix_div(float t, float3 col1, float3 col2)
 {
-	float tm = 1.0f - t;
+  float tm = 1.0f - t;
 
-	float3 outcol = col1;
+  float3 outcol = col1;
 
-	if(col2.x != 0.0f) outcol.x = tm*outcol.x + t*outcol.x/col2.x;
-	if(col2.y != 0.0f) outcol.y = tm*outcol.y + t*outcol.y/col2.y;
-	if(col2.z != 0.0f) outcol.z = tm*outcol.z + t*outcol.z/col2.z;
+  if (col2.x != 0.0f)
+    outcol.x = tm * outcol.x + t * outcol.x / col2.x;
+  if (col2.y != 0.0f)
+    outcol.y = tm * outcol.y + t * outcol.y / col2.y;
+  if (col2.z != 0.0f)
+    outcol.z = tm * outcol.z + t * outcol.z / col2.z;
 
-	return outcol;
+  return outcol;
 }
 
 ccl_device float3 svm_mix_diff(float t, float3 col1, float3 col2)
 {
-	return interp(col1, fabs(col1 - col2), t);
+  return interp(col1, fabs(col1 - col2), t);
 }
 
 ccl_device float3 svm_mix_dark(float t, float3 col1, float3 col2)
 {
-	return min(col1, col2)*t + col1*(1.0f - t);
+  return min(col1, col2) * t + col1 * (1.0f - t);
 }
 
 ccl_device float3 svm_mix_light(float t, float3 col1, float3 col2)
 {
-	return max(col1, col2*t);
+  return max(col1, col2 * t);
 }
 
 ccl_device float3 svm_mix_dodge(float t, float3 col1, float3 col2)
 {
-	float3 outcol = col1;
-
-	if(outcol.x != 0.0f) {
-		float tmp = 1.0f - t*col2.x;
-		if(tmp <= 0.0f)
-			outcol.x = 1.0f;
-		else if((tmp = outcol.x/tmp) > 1.0f)
-			outcol.x = 1.0f;
-		else
-			outcol.x = tmp;
-	}
-	if(outcol.y != 0.0f) {
-		float tmp = 1.0f - t*col2.y;
-		if(tmp <= 0.0f)
-			outcol.y = 1.0f;
-		else if((tmp = outcol.y/tmp) > 1.0f)
-			outcol.y = 1.0f;
-		else
-			outcol.y = tmp;
-	}
-	if(outcol.z != 0.0f) {
-		float tmp = 1.0f - t*col2.z;
-		if(tmp <= 0.0f)
-			outcol.z = 1.0f;
-		else if((tmp = outcol.z/tmp) > 1.0f)
-			outcol.z = 1.0f;
-		else
-			outcol.z = tmp;
-	}
-
-	return outcol;
+  float3 outcol = col1;
+
+  if (outcol.x != 0.0f) {
+    float tmp = 1.0f - t * col2.x;
+    if (tmp <= 0.0f)
+      outcol.x = 1.0f;
+    else if ((tmp = outcol.x / tmp) > 1.0f)
+      outcol.x = 1.0f;
+    else
+      outcol.x = tmp;
+  }
+  if (outcol.y != 0.0f) {
+    float tmp = 1.0f - t * col2.y;
+    if (tmp <= 0.0f)
+      outcol.y = 1.0f;
+    else if ((tmp = outcol.y / tmp) > 1.0f)
+      outcol.y = 1.0f;
+    else
+      outcol.y = tmp;
+  }
+  if (outcol.z != 0.0f) {
+    float tmp = 1.0f - t * col2.z;
+    if (tmp <= 0.0f)
+      outcol.z = 1.0f;
+    else if ((tmp = outcol.z / tmp) > 1.0f)
+      outcol.z = 1.0f;
+    else
+      outcol.z = tmp;
+  }
+
+  return outcol;
 }
 
 ccl_device float3 svm_mix_burn(float t, float3 col1, float3 col2)
 {
-	float tmp, tm = 1.0f - t;
-
-	float3 outcol = col1;
-
-	tmp = tm + t*col2.x;
-	if(tmp <= 0.0f)
-		outcol.x = 0.0f;
-	else if((tmp = (1.0f - (1.0f - outcol.x)/tmp)) < 0.0f)
-		outcol.x = 0.0f;
-	else if(tmp > 1.0f)
-		outcol.x = 1.0f;
-	else
-		outcol.x = tmp;
-
-	tmp = tm + t*col2.y;
-	if(tmp <= 0.0f)
-		outcol.y = 0.0f;
-	else if((tmp = (1.0f - (1.0f - outcol.y)/tmp)) < 0.0f)
-		outcol.y = 0.0f;
-	else if(tmp > 1.0f)
-		outcol.y = 1.0f;
-	else
-		outcol.y = tmp;
-
-	tmp = tm + t*col2.z;
-	if(tmp <= 0.0f)
-		outcol.z = 0.0f;
-	else if((tmp = (1.0f - (1.0f - outcol.z)/tmp)) < 0.0f)
-		outcol.z = 0.0f;
-	else if(tmp > 1.0f)
-		outcol.z = 1.0f;
-	else
-		outcol.z = tmp;
-
-	return outcol;
+  float tmp, tm = 1.0f - t;
+
+  float3 outcol = col1;
+
+  tmp = tm + t * col2.x;
+  if (tmp <= 0.0f)
+    outcol.x = 0.0f;
+  else if ((tmp = (1.0f - (1.0f - outcol.x) / tmp)) < 0.0f)
+    outcol.x = 0.0f;
+  else if (tmp > 1.0f)
+    outcol.x = 1.0f;
+  else
+    outcol.x = tmp;
+
+  tmp = tm + t * col2.y;
+  if (tmp <= 0.0f)
+    outcol.y = 0.0f;
+  else if ((tmp = (1.0f - (1.0f - outcol.y) / tmp)) < 0.0f)
+    outcol.y = 0.0f;
+  else if (tmp > 1.0f)
+    outcol.y = 1.0f;
+  else
+    outcol.y = tmp;
+
+  tmp = tm + t * col2.z;
+  if (tmp <= 0.0f)
+    outcol.z = 0.0f;
+  else if ((tmp = (1.0f - (1.0f - outcol.z) / tmp)) < 0.0f)
+    outcol.z = 0.0f;
+  else if (tmp > 1.0f)
+    outcol.z = 1.0f;
+  else
+    outcol.z = tmp;
+
+  return outcol;
 }
 
 ccl_device float3 svm_mix_hue(float t, float3 col1, float3 col2)
 {
-	float3 outcol = col1;
+  float3 outcol = col1;
 
-	float3 hsv2 = rgb_to_hsv(col2);
+  float3 hsv2 = rgb_to_hsv(col2);
 
-	if(hsv2.y != 0.0f) {
-		float3 hsv = rgb_to_hsv(outcol);
-		hsv.x = hsv2.x;
-		float3 tmp = hsv_to_rgb(hsv);
+  if (hsv2.y != 0.0f) {
+    float3 hsv = rgb_to_hsv(outcol);
+    hsv.x = hsv2.x;
+    float3 tmp = hsv_to_rgb(hsv);
 
-		outcol = interp(outcol, tmp, t);
-	}
+    outcol = interp(outcol, tmp, t);
+  }
 
-	return outcol;
+  return outcol;
 }
 
 ccl_device float3 svm_mix_sat(float t, float3 col1, float3 col2)
 {
-	float tm = 1.0f - t;
+  float tm = 1.0f - t;
 
-	float3 outcol = col1;
+  float3 outcol = col1;
 
-	float3 hsv = rgb_to_hsv(outcol);
+  float3 hsv = rgb_to_hsv(outcol);
 
-	if(hsv.y != 0.0f) {
-		float3 hsv2 = rgb_to_hsv(col2);
+  if (hsv.y != 0.0f) {
+    float3 hsv2 = rgb_to_hsv(col2);
 
-		hsv.y = tm*hsv.y + t*hsv2.y;
-		outcol = hsv_to_rgb(hsv);
-	}
+    hsv.y = tm * hsv.y + t * hsv2.y;
+    outcol = hsv_to_rgb(hsv);
+  }
 
-	return outcol;
+  return outcol;
 }
 
 ccl_device float3 svm_mix_val(float t, float3 col1, float3 col2)
 {
-	float tm = 1.0f - t;
+  float tm = 1.0f - t;
 
-	float3 hsv = rgb_to_hsv(col1);
-	float3 hsv2 = rgb_to_hsv(col2);
+  float3 hsv = rgb_to_hsv(col1);
+  float3 hsv2 = rgb_to_hsv(col2);
 
-	hsv.z = tm*hsv.z + t*hsv2.z;
+  hsv.z = tm * hsv.z + t * hsv2.z;
 
-	return hsv_to_rgb(hsv);
+  return hsv_to_rgb(hsv);
 }
 
 ccl_device float3 svm_mix_color(float t, float3 col1, float3 col2)
 {
-	float3 outcol = col1;
-	float3 hsv2 = rgb_to_hsv(col2);
+  float3 outcol = col1;
+  float3 hsv2 = rgb_to_hsv(col2);
 
-	if(hsv2.y != 0.0f) {
-		float3 hsv = rgb_to_hsv(outcol);
-		hsv.x = hsv2.x;
-		hsv.y = hsv2.y;
-		float3 tmp = hsv_to_rgb(hsv);
+  if (hsv2.y != 0.0f) {
+    float3 hsv = rgb_to_hsv(outcol);
+    hsv.x = hsv2.x;
+    hsv.y = hsv2.y;
+    float3 tmp = hsv_to_rgb(hsv);
 
-		outcol = interp(outcol, tmp, t);
-	}
+    outcol = interp(outcol, tmp, t);
+  }
 
-	return outcol;
+  return outcol;
 }
 
 ccl_device float3 svm_mix_soft(float t, float3 col1, float3 col2)
 {
-	float tm = 1.0f - t;
+  float tm = 1.0f - t;
 
-	float3 one = make_float3(1.0f, 1.0f, 1.0f);
-	float3 scr = one - (one - col2)*(one - col1);
+  float3 one = make_float3(1.0f, 1.0f, 1.0f);
+  float3 scr = one - (one - col2) * (one - col1);
 
-	return tm*col1 + t*((one - col1)*col2*col1 + col1*scr);
+  return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr);
 }
 
 ccl_device float3 svm_mix_linear(float t, float3 col1, float3 col2)
 {
-	return col1 + t*(2.0f*col2 + make_float3(-1.0f, -1.0f, -1.0f));
+  return col1 + t * (2.0f * col2 + make_float3(-1.0f, -1.0f, -1.0f));
 }
 
 ccl_device float3 svm_mix_clamp(float3 col)
 {
-	float3 outcol = col;
+  float3 outcol = col;
 
-	outcol.x = saturate(col.x);
-	outcol.y = saturate(col.y);
-	outcol.z = saturate(col.z);
+  outcol.x = saturate(col.x);
+  outcol.y = saturate(col.y);
+  outcol.z = saturate(col.z);
 
-	return outcol;
+  return outcol;
 }
 
 ccl_device_noinline float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2)
 {
-	float t = saturate(fac);
-
-	switch(type) {
-		case NODE_MIX_BLEND: return svm_mix_blend(t, c1, c2);
-		case NODE_MIX_ADD: return svm_mix_add(t, c1, c2);
-		case NODE_MIX_MUL: return svm_mix_mul(t, c1, c2);
-		case NODE_MIX_SCREEN: return svm_mix_screen(t, c1, c2);
-		case NODE_MIX_OVERLAY: return svm_mix_overlay(t, c1, c2);
-		case NODE_MIX_SUB: return svm_mix_sub(t, c1, c2);
-		case NODE_MIX_DIV: return svm_mix_div(t, c1, c2);
-		case NODE_MIX_DIFF: return svm_mix_diff(t, c1, c2);
-		case NODE_MIX_DARK: return svm_mix_dark(t, c1, c2);
-		case NODE_MIX_LIGHT: return svm_mix_light(t, c1, c2);
-		case NODE_MIX_DODGE: return svm_mix_dodge(t, c1, c2);
-		case NODE_MIX_BURN: return svm_mix_burn(t, c1, c2);
-		case NODE_MIX_HUE: return svm_mix_hue(t, c1, c2);
-		case NODE_MIX_SAT: return svm_mix_sat(t, c1, c2);
-		case NODE_MIX_VAL: return svm_mix_val (t, c1, c2);
-		case NODE_MIX_COLOR: return svm_mix_color(t, c1, c2);
-		case NODE_MIX_SOFT: return svm_mix_soft(t, c1, c2);
-		case NODE_MIX_LINEAR: return svm_mix_linear(t, c1, c2);
-		case NODE_MIX_CLAMP: return svm_mix_clamp(c1);
-	}
-
-	return make_float3(0.0f, 0.0f, 0.0f);
+  float t = saturate(fac);
+
+  switch (type) {
+    case NODE_MIX_BLEND:
+      return svm_mix_blend(t, c1, c2);
+    case NODE_MIX_ADD:
+      return svm_mix_add(t, c1, c2);
+    case NODE_MIX_MUL:
+      return svm_mix_mul(t, c1, c2);
+    case NODE_MIX_SCREEN:
+      return svm_mix_screen(t, c1, c2);
+    case NODE_MIX_OVERLAY:
+      return svm_mix_overlay(t, c1, c2);
+    case NODE_MIX_SUB:
+      return svm_mix_sub(t, c1, c2);
+    case NODE_MIX_DIV:
+      return svm_mix_div(t, c1, c2);
+    case NODE_MIX_DIFF:
+      return svm_mix_diff(t, c1, c2);
+    case NODE_MIX_DARK:
+      return svm_mix_dark(t, c1, c2);
+    case NODE_MIX_LIGHT:
+      return svm_mix_light(t, c1, c2);
+    case NODE_MIX_DODGE:
+      return svm_mix_dodge(t, c1, c2);
+    case NODE_MIX_BURN:
+      return svm_mix_burn(t, c1, c2);
+    case NODE_MIX_HUE:
+      return svm_mix_hue(t, c1, c2);
+    case NODE_MIX_SAT:
+      return svm_mix_sat(t, c1, c2);
+    case NODE_MIX_VAL:
+      return svm_mix_val(t, c1, c2);
+    case NODE_MIX_COLOR:
+      return svm_mix_color(t, c1, c2);
+    case NODE_MIX_SOFT:
+      return svm_mix_soft(t, c1, c2);
+    case NODE_MIX_LINEAR:
+      return svm_mix_linear(t, c1, c2);
+    case NODE_MIX_CLAMP:
+      return svm_mix_clamp(c1);
+  }
+
+  return make_float3(0.0f, 0.0f, 0.0f);
 }
 
 ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness, float contrast)
 {
-	float a = 1.0f + contrast;
-	float b = brightness - contrast*0.5f;
+  float a = 1.0f + contrast;
+  float b = brightness - contrast * 0.5f;
 
-	color.x = max(a*color.x + b, 0.0f);
-	color.y = max(a*color.y + b, 0.0f);
-	color.z = max(a*color.z + b, 0.0f);
+  color.x = max(a * color.x + b, 0.0f);
+  color.y = max(a * color.y + b, 0.0f);
+  color.z = max(a * color.z + b, 0.0f);
 
-	return color;
+  return color;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h
index 63b1dc6865e..5df6c9fb755 100644
--- a/intern/cycles/kernel/svm/svm_convert.h
+++ b/intern/cycles/kernel/svm/svm_convert.h
@@ -18,54 +18,55 @@ CCL_NAMESPACE_BEGIN
 
 /* Conversion Nodes */
 
-ccl_device void svm_node_convert(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to)
+ccl_device void svm_node_convert(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to)
 {
-	switch(type) {
-		case NODE_CONVERT_FI: {
-			float f = stack_load_float(stack, from);
-			stack_store_int(stack, to, float_to_int(f));
-			break;
-		}
-		case NODE_CONVERT_FV: {
-			float f = stack_load_float(stack, from);
-			stack_store_float3(stack, to, make_float3(f, f, f));
-			break;
-		}
-		case NODE_CONVERT_CF: {
-			float3 f = stack_load_float3(stack, from);
-			float g = linear_rgb_to_gray(kg, f);
-			stack_store_float(stack, to, g);
-			break;
-		}
-		case NODE_CONVERT_CI: {
-			float3 f = stack_load_float3(stack, from);
-			int i = (int)linear_rgb_to_gray(kg, f);
-			stack_store_int(stack, to, i);
-			break;
-		}
-		case NODE_CONVERT_VF: {
-			float3 f = stack_load_float3(stack, from);
-			float g = average(f);
-			stack_store_float(stack, to, g);
-			break;
-		}
-		case NODE_CONVERT_VI: {
-			float3 f = stack_load_float3(stack, from);
-			int i = (int)average(f);
-			stack_store_int(stack, to, i);
-			break;
-		}
-		case NODE_CONVERT_IF: {
-			float f = (float)stack_load_int(stack, from);
-			stack_store_float(stack, to, f);
-			break;
-		}
-		case NODE_CONVERT_IV: {
-			float f = (float)stack_load_int(stack, from);
-			stack_store_float3(stack, to, make_float3(f, f, f));
-			break;
-		}
-	}
+  switch (type) {
+    case NODE_CONVERT_FI: {
+      float f = stack_load_float(stack, from);
+      stack_store_int(stack, to, float_to_int(f));
+      break;
+    }
+    case NODE_CONVERT_FV: {
+      float f = stack_load_float(stack, from);
+      stack_store_float3(stack, to, make_float3(f, f, f));
+      break;
+    }
+    case NODE_CONVERT_CF: {
+      float3 f = stack_load_float3(stack, from);
+      float g = linear_rgb_to_gray(kg, f);
+      stack_store_float(stack, to, g);
+      break;
+    }
+    case NODE_CONVERT_CI: {
+      float3 f = stack_load_float3(stack, from);
+      int i = (int)linear_rgb_to_gray(kg, f);
+      stack_store_int(stack, to, i);
+      break;
+    }
+    case NODE_CONVERT_VF: {
+      float3 f = stack_load_float3(stack, from);
+      float g = average(f);
+      stack_store_float(stack, to, g);
+      break;
+    }
+    case NODE_CONVERT_VI: {
+      float3 f = stack_load_float3(stack, from);
+      int i = (int)average(f);
+      stack_store_int(stack, to, i);
+      break;
+    }
+    case NODE_CONVERT_IF: {
+      float f = (float)stack_load_int(stack, from);
+      stack_store_float(stack, to, f);
+      break;
+    }
+    case NODE_CONVERT_IV: {
+      float f = (float)stack_load_int(stack, from);
+      stack_store_float3(stack, to, make_float3(f, f, f));
+      break;
+    }
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h
index a69c9fe81f9..f16664a684c 100644
--- a/intern/cycles/kernel/svm/svm_displace.h
+++ b/intern/cycles/kernel/svm/svm_displace.h
@@ -21,144 +21,149 @@ CCL_NAMESPACE_BEGIN
 ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
 #ifdef __RAY_DIFFERENTIALS__
-	/* get normal input */
-	uint normal_offset, scale_offset, invert, use_object_space;
-	decode_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space);
+  /* get normal input */
+  uint normal_offset, scale_offset, invert, use_object_space;
+  decode_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space);
 
-	float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N;
+  float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
 
-	float3 dPdx = sd->dP.dx;
-	float3 dPdy = sd->dP.dy;
+  float3 dPdx = sd->dP.dx;
+  float3 dPdy = sd->dP.dy;
 
-	if(use_object_space) {
-		object_inverse_normal_transform(kg, sd, &normal_in);
-		object_inverse_dir_transform(kg, sd, &dPdx);
-		object_inverse_dir_transform(kg, sd, &dPdy);
-	}
+  if (use_object_space) {
+    object_inverse_normal_transform(kg, sd, &normal_in);
+    object_inverse_dir_transform(kg, sd, &dPdx);
+    object_inverse_dir_transform(kg, sd, &dPdy);
+  }
 
-	/* get surface tangents from normal */
-	float3 Rx = cross(dPdy, normal_in);
-	float3 Ry = cross(normal_in, dPdx);
+  /* get surface tangents from normal */
+  float3 Rx = cross(dPdy, normal_in);
+  float3 Ry = cross(normal_in, dPdx);
 
-	/* get bump values */
-	uint c_offset, x_offset, y_offset, strength_offset;
-	decode_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset);
+  /* get bump values */
+  uint c_offset, x_offset, y_offset, strength_offset;
+  decode_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset);
 
-	float h_c = stack_load_float(stack, c_offset);
-	float h_x = stack_load_float(stack, x_offset);
-	float h_y = stack_load_float(stack, y_offset);
+  float h_c = stack_load_float(stack, c_offset);
+  float h_x = stack_load_float(stack, x_offset);
+  float h_y = stack_load_float(stack, y_offset);
 
-	/* compute surface gradient and determinant */
-	float det = dot(dPdx, Rx);
-	float3 surfgrad = (h_x - h_c)*Rx + (h_y - h_c)*Ry;
+  /* compute surface gradient and determinant */
+  float det = dot(dPdx, Rx);
+  float3 surfgrad = (h_x - h_c) * Rx + (h_y - h_c) * Ry;
 
-	float absdet = fabsf(det);
+  float absdet = fabsf(det);
 
-	float strength = stack_load_float(stack, strength_offset);
-	float scale = stack_load_float(stack, scale_offset);
+  float strength = stack_load_float(stack, strength_offset);
+  float scale = stack_load_float(stack, scale_offset);
 
-	if(invert)
-		scale *= -1.0f;
+  if (invert)
+    scale *= -1.0f;
 
-	strength = max(strength, 0.0f);
+  strength = max(strength, 0.0f);
 
-	/* compute and output perturbed normal */
-	float3 normal_out = safe_normalize(absdet*normal_in - scale*signf(det)*surfgrad);
-	if(is_zero(normal_out)) {
-		normal_out = normal_in;
-	}
-	else {
-		normal_out = normalize(strength*normal_out + (1.0f - strength)*normal_in);
-	}
+  /* compute and output perturbed normal */
+  float3 normal_out = safe_normalize(absdet * normal_in - scale * signf(det) * surfgrad);
+  if (is_zero(normal_out)) {
+    normal_out = normal_in;
+  }
+  else {
+    normal_out = normalize(strength * normal_out + (1.0f - strength) * normal_in);
+  }
 
-	if(use_object_space) {
-		object_normal_transform(kg, sd, &normal_out);
-	}
+  if (use_object_space) {
+    object_normal_transform(kg, sd, &normal_out);
+  }
 
-	normal_out = ensure_valid_reflection(sd->Ng, sd->I, normal_out);
+  normal_out = ensure_valid_reflection(sd->Ng, sd->I, normal_out);
 
-	stack_store_float3(stack, node.w, normal_out);
+  stack_store_float3(stack, node.w, normal_out);
 #endif
 }
 
 /* Displacement Node */
 
-ccl_device void svm_node_set_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint fac_offset)
+ccl_device void svm_node_set_displacement(KernelGlobals *kg,
+                                          ShaderData *sd,
+                                          float *stack,
+                                          uint fac_offset)
 {
-	float3 dP = stack_load_float3(stack, fac_offset);
-	sd->P += dP;
+  float3 dP = stack_load_float3(stack, fac_offset);
+  sd->P += dP;
 }
 
 ccl_device void svm_node_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-	uint height_offset, midlevel_offset, scale_offset, normal_offset;
-	decode_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset);
-
-	float height = stack_load_float(stack, height_offset);
-	float midlevel = stack_load_float(stack, midlevel_offset);
-	float scale = stack_load_float(stack, scale_offset);
-	float3 normal = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N;
-	uint space = node.w;
-
-	float3 dP = normal;
-
-	if(space == NODE_NORMAL_MAP_OBJECT) {
-		/* Object space. */
-		object_inverse_normal_transform(kg, sd, &dP);
-		dP *= (height - midlevel) * scale;
-		object_dir_transform(kg, sd, &dP);
-	}
-	else {
-		/* World space. */
-		dP *= (height - midlevel) * scale;
-	}
-
-	stack_store_float3(stack, node.z, dP);
+  uint height_offset, midlevel_offset, scale_offset, normal_offset;
+  decode_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset);
+
+  float height = stack_load_float(stack, height_offset);
+  float midlevel = stack_load_float(stack, midlevel_offset);
+  float scale = stack_load_float(stack, scale_offset);
+  float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
+  uint space = node.w;
+
+  float3 dP = normal;
+
+  if (space == NODE_NORMAL_MAP_OBJECT) {
+    /* Object space. */
+    object_inverse_normal_transform(kg, sd, &dP);
+    dP *= (height - midlevel) * scale;
+    object_dir_transform(kg, sd, &dP);
+  }
+  else {
+    /* World space. */
+    dP *= (height - midlevel) * scale;
+  }
+
+  stack_store_float3(stack, node.z, dP);
 }
 
-ccl_device void svm_node_vector_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_vector_displacement(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint4 data_node = read_node(kg, offset);
-	uint space = data_node.x;
-
-	uint vector_offset, midlevel_offset,scale_offset, displacement_offset;
-	decode_node_uchar4(node.y, &vector_offset, &midlevel_offset, &scale_offset, &displacement_offset);
-
-	float3 vector = stack_load_float3(stack, vector_offset);
-	float midlevel = stack_load_float(stack, midlevel_offset);
-	float scale = stack_load_float(stack, scale_offset);
-	float3 dP = (vector - make_float3(midlevel, midlevel, midlevel)) * scale;
-
-	if(space == NODE_NORMAL_MAP_TANGENT) {
-		/* Tangent space. */
-		float3 normal = sd->N;
-		object_inverse_normal_transform(kg, sd, &normal);
-
-		const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
-		float3 tangent;
-		if(attr.offset != ATTR_STD_NOT_FOUND) {
-			tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
-		}
-		else {
-			tangent = normalize(sd->dPdu);
-		}
-
-		float3 bitangent = normalize(cross(normal, tangent));
-		const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
-		if(attr_sign.offset != ATTR_STD_NOT_FOUND) {
-			float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
-			bitangent *= sign;
-		}
-
-		dP = tangent*dP.x + normal*dP.y + bitangent*dP.z;
-	}
-
-	if(space != NODE_NORMAL_MAP_WORLD) {
-		/* Tangent or object space. */
-		object_dir_transform(kg, sd, &dP);
-	}
-
-	stack_store_float3(stack, displacement_offset, dP);
+  uint4 data_node = read_node(kg, offset);
+  uint space = data_node.x;
+
+  uint vector_offset, midlevel_offset, scale_offset, displacement_offset;
+  decode_node_uchar4(
+      node.y, &vector_offset, &midlevel_offset, &scale_offset, &displacement_offset);
+
+  float3 vector = stack_load_float3(stack, vector_offset);
+  float midlevel = stack_load_float(stack, midlevel_offset);
+  float scale = stack_load_float(stack, scale_offset);
+  float3 dP = (vector - make_float3(midlevel, midlevel, midlevel)) * scale;
+
+  if (space == NODE_NORMAL_MAP_TANGENT) {
+    /* Tangent space. */
+    float3 normal = sd->N;
+    object_inverse_normal_transform(kg, sd, &normal);
+
+    const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
+    float3 tangent;
+    if (attr.offset != ATTR_STD_NOT_FOUND) {
+      tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
+    }
+    else {
+      tangent = normalize(sd->dPdu);
+    }
+
+    float3 bitangent = normalize(cross(normal, tangent));
+    const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
+    if (attr_sign.offset != ATTR_STD_NOT_FOUND) {
+      float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
+      bitangent *= sign;
+    }
+
+    dP = tangent * dP.x + normal * dP.y + bitangent * dP.z;
+  }
+
+  if (space != NODE_NORMAL_MAP_WORLD) {
+    /* Tangent or object space. */
+    object_dir_transform(kg, sd, &dP);
+  }
+
+  stack_store_float3(stack, displacement_offset, dP);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h
index 99dda5fb170..03119991597 100644
--- a/intern/cycles/kernel/svm/svm_fresnel.h
+++ b/intern/cycles/kernel/svm/svm_fresnel.h
@@ -18,56 +18,60 @@ CCL_NAMESPACE_BEGIN
 
 /* Fresnel Node */
 
-ccl_device void svm_node_fresnel(ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node)
+ccl_device void svm_node_fresnel(
+    ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node)
 {
-	uint normal_offset, out_offset;
-	decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL);
-	float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __uint_as_float(ior_value);
-	float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N;
+  uint normal_offset, out_offset;
+  decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL);
+  float eta = (stack_valid(ior_offset)) ? stack_load_float(stack, ior_offset) :
+                                          __uint_as_float(ior_value);
+  float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
 
-	eta = fmaxf(eta, 1e-5f);
-	eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
+  eta = fmaxf(eta, 1e-5f);
+  eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
 
-	float f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
+  float f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
 
-	stack_store_float(stack, out_offset, f);
+  stack_store_float(stack, out_offset, f);
 }
 
 /* Layer Weight Node */
 
 ccl_device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
 {
-	uint blend_offset = node.y;
-	uint blend_value = node.z;
+  uint blend_offset = node.y;
+  uint blend_value = node.z;
 
-	uint type, normal_offset, out_offset;
-	decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL);
+  uint type, normal_offset, out_offset;
+  decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL);
 
-	float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __uint_as_float(blend_value);
-	float3 normal_in = (stack_valid(normal_offset))? stack_load_float3(stack, normal_offset): sd->N;
+  float blend = (stack_valid(blend_offset)) ? stack_load_float(stack, blend_offset) :
+                                              __uint_as_float(blend_value);
+  float3 normal_in = (stack_valid(normal_offset)) ? stack_load_float3(stack, normal_offset) :
+                                                    sd->N;
 
-	float f;
+  float f;
 
-	if(type == NODE_LAYER_WEIGHT_FRESNEL) {
-		float eta = fmaxf(1.0f - blend, 1e-5f);
-		eta = (sd->flag & SD_BACKFACING)? eta: 1.0f/eta;
+  if (type == NODE_LAYER_WEIGHT_FRESNEL) {
+    float eta = fmaxf(1.0f - blend, 1e-5f);
+    eta = (sd->flag & SD_BACKFACING) ? eta : 1.0f / eta;
 
-		f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
-	}
-	else {
-		f = fabsf(dot(sd->I, normal_in));
+    f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
+  }
+  else {
+    f = fabsf(dot(sd->I, normal_in));
 
-		if(blend != 0.5f) {
-			blend = clamp(blend, 0.0f, 1.0f-1e-5f);
-			blend = (blend < 0.5f)? 2.0f*blend: 0.5f/(1.0f - blend);
+    if (blend != 0.5f) {
+      blend = clamp(blend, 0.0f, 1.0f - 1e-5f);
+      blend = (blend < 0.5f) ? 2.0f * blend : 0.5f / (1.0f - blend);
 
-			f = powf(f, blend);
-		}
+      f = powf(f, blend);
+    }
 
-		f = 1.0f - f;
-	}
+    f = 1.0f - f;
+  }
 
-	stack_store_float(stack, out_offset, f);
+  stack_store_float(stack, out_offset, f);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_gamma.h b/intern/cycles/kernel/svm/svm_gamma.h
index 171945a60bc..65eb08eb0eb 100644
--- a/intern/cycles/kernel/svm/svm_gamma.h
+++ b/intern/cycles/kernel/svm/svm_gamma.h
@@ -16,15 +16,16 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void svm_node_gamma(ShaderData *sd, float *stack, uint in_gamma, uint in_color, uint out_color)
+ccl_device void svm_node_gamma(
+    ShaderData *sd, float *stack, uint in_gamma, uint in_color, uint out_color)
 {
-	float3 color = stack_load_float3(stack, in_color);
-	float gamma = stack_load_float(stack, in_gamma);
+  float3 color = stack_load_float3(stack, in_color);
+  float gamma = stack_load_float(stack, in_gamma);
 
-	color = svm_math_gamma_color(color, gamma);
+  color = svm_math_gamma_color(color, gamma);
 
-	if(stack_valid(out_color))
-		stack_store_float3(stack, out_color, color);
+  if (stack_valid(out_color))
+    stack_store_float3(stack, out_color, color);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index 05443772505..a9104643299 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -18,192 +18,217 @@ CCL_NAMESPACE_BEGIN
 
 /* Geometry Node */
 
-ccl_device_inline void svm_node_geometry(KernelGlobals *kg,
-                                         ShaderData *sd,
-                                         float *stack,
-                                         uint type,
-                                         uint out_offset)
+ccl_device_inline void svm_node_geometry(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
 {
-	float3 data;
-
-	switch(type) {
-		case NODE_GEOM_P: data = sd->P; break;
-		case NODE_GEOM_N: data = sd->N; break;
+  float3 data;
+
+  switch (type) {
+    case NODE_GEOM_P:
+      data = sd->P;
+      break;
+    case NODE_GEOM_N:
+      data = sd->N;
+      break;
 #ifdef __DPDU__
-		case NODE_GEOM_T: data = primitive_tangent(kg, sd); break;
+    case NODE_GEOM_T:
+      data = primitive_tangent(kg, sd);
+      break;
 #endif
-		case NODE_GEOM_I: data = sd->I; break;
-		case NODE_GEOM_Ng: data = sd->Ng; break;
+    case NODE_GEOM_I:
+      data = sd->I;
+      break;
+    case NODE_GEOM_Ng:
+      data = sd->Ng;
+      break;
 #ifdef __UV__
-		case NODE_GEOM_uv: data = make_float3(sd->u, sd->v, 0.0f); break;
+    case NODE_GEOM_uv:
+      data = make_float3(sd->u, sd->v, 0.0f);
+      break;
 #endif
-		default: data = make_float3(0.0f, 0.0f, 0.0f);
-	}
+    default:
+      data = make_float3(0.0f, 0.0f, 0.0f);
+  }
 
-	stack_store_float3(stack, out_offset, data);
+  stack_store_float3(stack, out_offset, data);
 }
 
-ccl_device void svm_node_geometry_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device void svm_node_geometry_bump_dx(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
-	float3 data;
-
-	switch(type) {
-		case NODE_GEOM_P: data = sd->P + sd->dP.dx; break;
-		case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f); break;
-		default: svm_node_geometry(kg, sd, stack, type, out_offset); return;
-	}
-
-	stack_store_float3(stack, out_offset, data);
+  float3 data;
+
+  switch (type) {
+    case NODE_GEOM_P:
+      data = sd->P + sd->dP.dx;
+      break;
+    case NODE_GEOM_uv:
+      data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f);
+      break;
+    default:
+      svm_node_geometry(kg, sd, stack, type, out_offset);
+      return;
+  }
+
+  stack_store_float3(stack, out_offset, data);
 #else
-	svm_node_geometry(kg, sd, stack, type, out_offset);
+  svm_node_geometry(kg, sd, stack, type, out_offset);
 #endif
 }
 
-ccl_device void svm_node_geometry_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device void svm_node_geometry_bump_dy(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
-	float3 data;
-
-	switch(type) {
-		case NODE_GEOM_P: data = sd->P + sd->dP.dy; break;
-		case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f); break;
-		default: svm_node_geometry(kg, sd, stack, type, out_offset); return;
-	}
-
-	stack_store_float3(stack, out_offset, data);
+  float3 data;
+
+  switch (type) {
+    case NODE_GEOM_P:
+      data = sd->P + sd->dP.dy;
+      break;
+    case NODE_GEOM_uv:
+      data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f);
+      break;
+    default:
+      svm_node_geometry(kg, sd, stack, type, out_offset);
+      return;
+  }
+
+  stack_store_float3(stack, out_offset, data);
 #else
-	svm_node_geometry(kg, sd, stack, type, out_offset);
+  svm_node_geometry(kg, sd, stack, type, out_offset);
 #endif
 }
 
 /* Object Info */
 
-ccl_device void svm_node_object_info(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device void svm_node_object_info(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
 {
-	float data;
-
-	switch(type) {
-		case NODE_INFO_OB_LOCATION: {
-			stack_store_float3(stack, out_offset, object_location(kg, sd));
-			return;
-		}
-		case NODE_INFO_OB_INDEX: data = object_pass_id(kg, sd->object); break;
-		case NODE_INFO_MAT_INDEX: data = shader_pass_id(kg, sd); break;
-		case NODE_INFO_OB_RANDOM: {
-			if(sd->lamp != LAMP_NONE) {
-				data = lamp_random_number(kg, sd->lamp);
-			}
-			else {
-				data = object_random_number(kg, sd->object);
-			}
-			break;
-		}
-		default: data = 0.0f; break;
-	}
-
-	stack_store_float(stack, out_offset, data);
+  float data;
+
+  switch (type) {
+    case NODE_INFO_OB_LOCATION: {
+      stack_store_float3(stack, out_offset, object_location(kg, sd));
+      return;
+    }
+    case NODE_INFO_OB_INDEX:
+      data = object_pass_id(kg, sd->object);
+      break;
+    case NODE_INFO_MAT_INDEX:
+      data = shader_pass_id(kg, sd);
+      break;
+    case NODE_INFO_OB_RANDOM: {
+      if (sd->lamp != LAMP_NONE) {
+        data = lamp_random_number(kg, sd->lamp);
+      }
+      else {
+        data = object_random_number(kg, sd->object);
+      }
+      break;
+    }
+    default:
+      data = 0.0f;
+      break;
+  }
+
+  stack_store_float(stack, out_offset, data);
 }
 
 /* Particle Info */
 
-ccl_device void svm_node_particle_info(KernelGlobals *kg,
-                                       ShaderData *sd,
-                                       float *stack,
-                                       uint type,
-                                       uint out_offset)
+ccl_device void svm_node_particle_info(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
 {
-	switch(type) {
-		case NODE_INFO_PAR_INDEX: {
-			int particle_id = object_particle_id(kg, sd->object);
-			stack_store_float(stack, out_offset, particle_index(kg, particle_id));
-			break;
-		}
-		case NODE_INFO_PAR_RANDOM: {
-			int particle_id = object_particle_id(kg, sd->object);
-			float random = hash_int_01(particle_index(kg, particle_id));
-			stack_store_float(stack, out_offset, random);
-			break;
-		}
-		case NODE_INFO_PAR_AGE: {
-			int particle_id = object_particle_id(kg, sd->object);
-			stack_store_float(stack, out_offset, particle_age(kg, particle_id));
-			break;
-		}
-		case NODE_INFO_PAR_LIFETIME: {
-			int particle_id = object_particle_id(kg, sd->object);
-			stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id));
-			break;
-		}
-		case NODE_INFO_PAR_LOCATION: {
-			int particle_id = object_particle_id(kg, sd->object);
-			stack_store_float3(stack, out_offset, particle_location(kg, particle_id));
-			break;
-		}
-#if 0	/* XXX float4 currently not supported in SVM stack */
-		case NODE_INFO_PAR_ROTATION: {
-			int particle_id = object_particle_id(kg, sd->object);
-			stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id));
-			break;
-		}
+  switch (type) {
+    case NODE_INFO_PAR_INDEX: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float(stack, out_offset, particle_index(kg, particle_id));
+      break;
+    }
+    case NODE_INFO_PAR_RANDOM: {
+      int particle_id = object_particle_id(kg, sd->object);
+      float random = hash_int_01(particle_index(kg, particle_id));
+      stack_store_float(stack, out_offset, random);
+      break;
+    }
+    case NODE_INFO_PAR_AGE: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float(stack, out_offset, particle_age(kg, particle_id));
+      break;
+    }
+    case NODE_INFO_PAR_LIFETIME: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id));
+      break;
+    }
+    case NODE_INFO_PAR_LOCATION: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float3(stack, out_offset, particle_location(kg, particle_id));
+      break;
+    }
+#if 0 /* XXX float4 currently not supported in SVM stack */
+    case NODE_INFO_PAR_ROTATION: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id));
+      break;
+    }
 #endif
-		case NODE_INFO_PAR_SIZE: {
-			int particle_id = object_particle_id(kg, sd->object);
-			stack_store_float(stack, out_offset, particle_size(kg, particle_id));
-			break;
-		}
-		case NODE_INFO_PAR_VELOCITY: {
-			int particle_id = object_particle_id(kg, sd->object);
-			stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id));
-			break;
-		}
-		case NODE_INFO_PAR_ANGULAR_VELOCITY: {
-			int particle_id = object_particle_id(kg, sd->object);
-			stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id));
-			break;
-		}
-	}
+    case NODE_INFO_PAR_SIZE: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float(stack, out_offset, particle_size(kg, particle_id));
+      break;
+    }
+    case NODE_INFO_PAR_VELOCITY: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id));
+      break;
+    }
+    case NODE_INFO_PAR_ANGULAR_VELOCITY: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id));
+      break;
+    }
+  }
 }
 
 #ifdef __HAIR__
 
 /* Hair Info */
 
-ccl_device void svm_node_hair_info(KernelGlobals *kg,
-                                   ShaderData *sd,
-                                   float *stack,
-                                   uint type,
-                                   uint out_offset)
+ccl_device void svm_node_hair_info(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
 {
-	float data;
-	float3 data3;
-
-	switch(type) {
-		case NODE_INFO_CURVE_IS_STRAND: {
-			data = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
-			stack_store_float(stack, out_offset, data);
-			break;
-		}
-		case NODE_INFO_CURVE_INTERCEPT:
-			break; /* handled as attribute */
-		case NODE_INFO_CURVE_RANDOM:
-			break; /* handled as attribute */
-		case NODE_INFO_CURVE_THICKNESS: {
-			data = curve_thickness(kg, sd);
-			stack_store_float(stack, out_offset, data);
-			break;
-		}
-		/*case NODE_INFO_CURVE_FADE: {
-			data = sd->curve_transparency;
-			stack_store_float(stack, out_offset, data);
-			break;
-		}*/
-		case NODE_INFO_CURVE_TANGENT_NORMAL: {
-			data3 = curve_tangent_normal(kg, sd);
-			stack_store_float3(stack, out_offset, data3);
-			break;
-		}
-	}
+  float data;
+  float3 data3;
+
+  switch (type) {
+    case NODE_INFO_CURVE_IS_STRAND: {
+      data = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
+      stack_store_float(stack, out_offset, data);
+      break;
+    }
+    case NODE_INFO_CURVE_INTERCEPT:
+      break; /* handled as attribute */
+    case NODE_INFO_CURVE_RANDOM:
+      break; /* handled as attribute */
+    case NODE_INFO_CURVE_THICKNESS: {
+      data = curve_thickness(kg, sd);
+      stack_store_float(stack, out_offset, data);
+      break;
+    }
+    /*case NODE_INFO_CURVE_FADE: {
+      data = sd->curve_transparency;
+      stack_store_float(stack, out_offset, data);
+      break;
+    }*/
+    case NODE_INFO_CURVE_TANGENT_NORMAL: {
+      data3 = curve_tangent_normal(kg, sd);
+      stack_store_float3(stack, out_offset, data3);
+      break;
+    }
+  }
 }
 #endif
 
diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h
index 177e0506dee..c315564fbc2 100644
--- a/intern/cycles/kernel/svm/svm_gradient.h
+++ b/intern/cycles/kernel/svm/svm_gradient.h
@@ -20,61 +20,61 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device float svm_gradient(float3 p, NodeGradientType type)
 {
-	float x, y, z;
+  float x, y, z;
 
-	x = p.x;
-	y = p.y;
-	z = p.z;
+  x = p.x;
+  y = p.y;
+  z = p.z;
 
-	if(type == NODE_BLEND_LINEAR) {
-		return x;
-	}
-	else if(type == NODE_BLEND_QUADRATIC) {
-		float r = fmaxf(x, 0.0f);
-		return r*r;
-	}
-	else if(type == NODE_BLEND_EASING) {
-		float r = fminf(fmaxf(x, 0.0f), 1.0f);
-		float t = r*r;
+  if (type == NODE_BLEND_LINEAR) {
+    return x;
+  }
+  else if (type == NODE_BLEND_QUADRATIC) {
+    float r = fmaxf(x, 0.0f);
+    return r * r;
+  }
+  else if (type == NODE_BLEND_EASING) {
+    float r = fminf(fmaxf(x, 0.0f), 1.0f);
+    float t = r * r;
 
-		return (3.0f*t - 2.0f*t*r);
-	}
-	else if(type == NODE_BLEND_DIAGONAL) {
-		return (x + y) * 0.5f;
-	}
-	else if(type == NODE_BLEND_RADIAL) {
-		return atan2f(y, x) / M_2PI_F + 0.5f;
-	}
-	else {
-		/* Bias a little bit for the case where p is a unit length vector,
-		 * to get exactly zero instead of a small random value depending
-		 * on float precision. */
-		float r = fmaxf(0.999999f - sqrtf(x*x + y*y + z*z), 0.0f);
+    return (3.0f * t - 2.0f * t * r);
+  }
+  else if (type == NODE_BLEND_DIAGONAL) {
+    return (x + y) * 0.5f;
+  }
+  else if (type == NODE_BLEND_RADIAL) {
+    return atan2f(y, x) / M_2PI_F + 0.5f;
+  }
+  else {
+    /* Bias a little bit for the case where p is a unit length vector,
+     * to get exactly zero instead of a small random value depending
+     * on float precision. */
+    float r = fmaxf(0.999999f - sqrtf(x * x + y * y + z * z), 0.0f);
 
-		if(type == NODE_BLEND_QUADRATIC_SPHERE)
-			return r*r;
-		else if(type == NODE_BLEND_SPHERICAL)
-			return r;
-	}
+    if (type == NODE_BLEND_QUADRATIC_SPHERE)
+      return r * r;
+    else if (type == NODE_BLEND_SPHERICAL)
+      return r;
+  }
 
-	return 0.0f;
+  return 0.0f;
 }
 
 ccl_device void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node)
 {
-	uint type, co_offset, color_offset, fac_offset;
+  uint type, co_offset, color_offset, fac_offset;
 
-	decode_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset);
+  decode_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset);
 
-	float3 co = stack_load_float3(stack, co_offset);
+  float3 co = stack_load_float3(stack, co_offset);
 
-	float f = svm_gradient(co, (NodeGradientType)type);
-	f = saturate(f);
+  float f = svm_gradient(co, (NodeGradientType)type);
+  f = saturate(f);
 
-	if(stack_valid(fac_offset))
-		stack_store_float(stack, fac_offset, f);
-	if(stack_valid(color_offset))
-		stack_store_float3(stack, color_offset, make_float3(f, f, f));
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, f);
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, make_float3(f, f, f));
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
index 6f3efa639e2..72379fba870 100644
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@@ -19,43 +19,44 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_hsv(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint in_color_offset, fac_offset, out_color_offset;
-	uint hue_offset, sat_offset, val_offset;
-	decode_node_uchar4(node.y, &in_color_offset, &fac_offset, &out_color_offset, NULL);
-	decode_node_uchar4(node.z, &hue_offset, &sat_offset, &val_offset, NULL);
+  uint in_color_offset, fac_offset, out_color_offset;
+  uint hue_offset, sat_offset, val_offset;
+  decode_node_uchar4(node.y, &in_color_offset, &fac_offset, &out_color_offset, NULL);
+  decode_node_uchar4(node.z, &hue_offset, &sat_offset, &val_offset, NULL);
 
-	float fac = stack_load_float(stack, fac_offset);
-	float3 in_color = stack_load_float3(stack, in_color_offset);
-	float3 color = in_color;
+  float fac = stack_load_float(stack, fac_offset);
+  float3 in_color = stack_load_float3(stack, in_color_offset);
+  float3 color = in_color;
 
-	float hue = stack_load_float(stack, hue_offset);
-	float sat = stack_load_float(stack, sat_offset);
-	float val = stack_load_float(stack, val_offset);
+  float hue = stack_load_float(stack, hue_offset);
+  float sat = stack_load_float(stack, sat_offset);
+  float val = stack_load_float(stack, val_offset);
 
-	color = rgb_to_hsv(color);
+  color = rgb_to_hsv(color);
 
-	/* remember: fmod doesn't work for negative numbers here */
-	color.x = fmodf(color.x + hue + 0.5f, 1.0f);
-	color.y = saturate(color.y * sat);
-	color.z *= val;
+  /* remember: fmod doesn't work for negative numbers here */
+  color.x = fmodf(color.x + hue + 0.5f, 1.0f);
+  color.y = saturate(color.y * sat);
+  color.z *= val;
 
-	color = hsv_to_rgb(color);
+  color = hsv_to_rgb(color);
 
-	color.x = fac*color.x + (1.0f - fac)*in_color.x;
-	color.y = fac*color.y + (1.0f - fac)*in_color.y;
-	color.z = fac*color.z + (1.0f - fac)*in_color.z;
+  color.x = fac * color.x + (1.0f - fac) * in_color.x;
+  color.y = fac * color.y + (1.0f - fac) * in_color.y;
+  color.z = fac * color.z + (1.0f - fac) * in_color.z;
 
-	/* Clamp color to prevent negative values caused by oversaturation. */
-	color.x = max(color.x, 0.0f);
-	color.y = max(color.y, 0.0f);
-	color.z = max(color.z, 0.0f);
+  /* Clamp color to prevent negative values caused by oversaturation. */
+  color.x = max(color.x, 0.0f);
+  color.y = max(color.y, 0.0f);
+  color.z = max(color.z, 0.0f);
 
-	if(stack_valid(out_color_offset))
-		stack_store_float3(stack, out_color_offset, color);
+  if (stack_valid(out_color_offset))
+    stack_store_float3(stack, out_color_offset, color);
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __SVM_HSV_H__ */
+#endif /* __SVM_HSV_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h
index 6130c3348b0..9434c0c5505 100644
--- a/intern/cycles/kernel/svm/svm_ies.h
+++ b/intern/cycles/kernel/svm/svm_ies.h
@@ -18,93 +18,102 @@ CCL_NAMESPACE_BEGIN
 
 /* IES Light */
 
-ccl_device_inline float interpolate_ies_vertical(KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
+ccl_device_inline float interpolate_ies_vertical(
+    KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
 {
-	/* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end of v
-	 * (corresponding to the north pole) would result in artifacts.
-	 * The proper way of dealing with this would be to lookup the corresponding value on the other side of the pole,
-	 * but since the horizontal coordinates might be nonuniform, this would require yet another interpolation.
-	 * Therefore, the assumtion is made that the light is going to be symmetrical, which means that we can just take
-	 * the corresponding value at the current horizontal coordinate. */
-
-#define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs+h*v_num+(v))
-	/* If v is zero, assume symmetry and read at v=1 instead of v=-1. */
-	float a = IES_LOOKUP((v == 0)? 1 : v-1);
-	float b = IES_LOOKUP(v);
-	float c = IES_LOOKUP(v+1);
-	float d = IES_LOOKUP(min(v+2, v_num-1));
+  /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end of v
+   * (corresponding to the north pole) would result in artifacts.
+   * The proper way of dealing with this would be to lookup the corresponding value on the other side of the pole,
+   * but since the horizontal coordinates might be nonuniform, this would require yet another interpolation.
+   * Therefore, the assumtion is made that the light is going to be symmetrical, which means that we can just take
+   * the corresponding value at the current horizontal coordinate. */
+
+#define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs + h * v_num + (v))
+  /* If v is zero, assume symmetry and read at v=1 instead of v=-1. */
+  float a = IES_LOOKUP((v == 0) ? 1 : v - 1);
+  float b = IES_LOOKUP(v);
+  float c = IES_LOOKUP(v + 1);
+  float d = IES_LOOKUP(min(v + 2, v_num - 1));
 #undef IES_LOOKUP
 
-	return cubic_interp(a, b, c, d, v_frac);
+  return cubic_interp(a, b, c, d, v_frac);
 }
 
-ccl_device_inline float kernel_ies_interp(KernelGlobals *kg, int slot, float h_angle, float v_angle)
+ccl_device_inline float kernel_ies_interp(KernelGlobals *kg,
+                                          int slot,
+                                          float h_angle,
+                                          float v_angle)
 {
-	/* Find offset of the IES data in the table. */
-	int ofs = __float_as_int(kernel_tex_fetch(__ies, slot));
-	if(ofs == -1) {
-		return 100.0f;
-	}
-
-	int h_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
-	int v_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
-
-#define IES_LOOKUP_ANGLE_H(h) kernel_tex_fetch(__ies, ofs+(h))
-#define IES_LOOKUP_ANGLE_V(v) kernel_tex_fetch(__ies, ofs+h_num+(v))
-
-	/* Check whether the angle is within the bounds of the IES texture. */
-	if(v_angle >= IES_LOOKUP_ANGLE_V(v_num-1)) {
-		return 0.0f;
-	}
-	kernel_assert(v_angle >= IES_LOOKUP_ANGLE_V(0));
-	kernel_assert(h_angle >= IES_LOOKUP_ANGLE_H(0));
-	kernel_assert(h_angle <= IES_LOOKUP_ANGLE_H(h_num-1));
-
-	/* Lookup the angles to find the table position. */
-	int h_i, v_i;
-	/* TODO(lukas): Consider using bisection. Probably not worth it for the vast majority of IES files. */
-	for(h_i = 0; IES_LOOKUP_ANGLE_H(h_i+1) < h_angle; h_i++);
-	for(v_i = 0; IES_LOOKUP_ANGLE_V(v_i+1) < v_angle; v_i++);
-
-	float h_frac = inverse_lerp(IES_LOOKUP_ANGLE_H(h_i), IES_LOOKUP_ANGLE_H(h_i+1), h_angle);
-	float v_frac = inverse_lerp(IES_LOOKUP_ANGLE_V(v_i), IES_LOOKUP_ANGLE_V(v_i+1), v_angle);
+  /* Find offset of the IES data in the table. */
+  int ofs = __float_as_int(kernel_tex_fetch(__ies, slot));
+  if (ofs == -1) {
+    return 100.0f;
+  }
+
+  int h_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
+  int v_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
+
+#define IES_LOOKUP_ANGLE_H(h) kernel_tex_fetch(__ies, ofs + (h))
+#define IES_LOOKUP_ANGLE_V(v) kernel_tex_fetch(__ies, ofs + h_num + (v))
+
+  /* Check whether the angle is within the bounds of the IES texture. */
+  if (v_angle >= IES_LOOKUP_ANGLE_V(v_num - 1)) {
+    return 0.0f;
+  }
+  kernel_assert(v_angle >= IES_LOOKUP_ANGLE_V(0));
+  kernel_assert(h_angle >= IES_LOOKUP_ANGLE_H(0));
+  kernel_assert(h_angle <= IES_LOOKUP_ANGLE_H(h_num - 1));
+
+  /* Lookup the angles to find the table position. */
+  int h_i, v_i;
+  /* TODO(lukas): Consider using bisection. Probably not worth it for the vast majority of IES files. */
+  for (h_i = 0; IES_LOOKUP_ANGLE_H(h_i + 1) < h_angle; h_i++)
+    ;
+  for (v_i = 0; IES_LOOKUP_ANGLE_V(v_i + 1) < v_angle; v_i++)
+    ;
+
+  float h_frac = inverse_lerp(IES_LOOKUP_ANGLE_H(h_i), IES_LOOKUP_ANGLE_H(h_i + 1), h_angle);
+  float v_frac = inverse_lerp(IES_LOOKUP_ANGLE_V(v_i), IES_LOOKUP_ANGLE_V(v_i + 1), v_angle);
 
 #undef IES_LOOKUP_ANGLE_H
 #undef IES_LOOKUP_ANGLE_V
 
-	/* Skip forward to the actual intensity data. */
-	ofs += h_num+v_num;
-
-	/* Perform cubic interpolation along the horizontal coordinate to get the intensity value.
-	 * If h_i is zero, just wrap around since the horizontal angles always go over the full circle.
-	 * However, the last entry (360°) equals the first one, so we need to wrap around to the one before that. */
-	float a = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, (h_i == 0)? h_num-2 : h_i-1);
-	float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i);
-	float c = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i+1);
-	/* Same logic here, wrap around to the second element if necessary. */
-	float d = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, (h_i+2 == h_num)? 1 : h_i+2);
-
-	/* Cubic interpolation can result in negative values, so get rid of them. */
-	return max(cubic_interp(a, b, c, d, h_frac), 0.0f);
+  /* Skip forward to the actual intensity data. */
+  ofs += h_num + v_num;
+
+  /* Perform cubic interpolation along the horizontal coordinate to get the intensity value.
+   * If h_i is zero, just wrap around since the horizontal angles always go over the full circle.
+   * However, the last entry (360°) equals the first one, so we need to wrap around to the one before that. */
+  float a = interpolate_ies_vertical(
+      kg, ofs, v_i, v_num, v_frac, (h_i == 0) ? h_num - 2 : h_i - 1);
+  float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i);
+  float c = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i + 1);
+  /* Same logic here, wrap around to the second element if necessary. */
+  float d = interpolate_ies_vertical(
+      kg, ofs, v_i, v_num, v_frac, (h_i + 2 == h_num) ? 1 : h_i + 2);
+
+  /* Cubic interpolation can result in negative values, so get rid of them. */
+  return max(cubic_interp(a, b, c, d, h_frac), 0.0f);
 }
 
-ccl_device void svm_node_ies(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_ies(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint vector_offset, strength_offset, fac_offset, dummy, slot = node.z;
-	decode_node_uchar4(node.y, &strength_offset, &vector_offset, &fac_offset, &dummy);
+  uint vector_offset, strength_offset, fac_offset, dummy, slot = node.z;
+  decode_node_uchar4(node.y, &strength_offset, &vector_offset, &fac_offset, &dummy);
 
-	float3 vector = stack_load_float3(stack, vector_offset);
-	float strength = stack_load_float_default(stack, strength_offset, node.w);
+  float3 vector = stack_load_float3(stack, vector_offset);
+  float strength = stack_load_float_default(stack, strength_offset, node.w);
 
-	vector = normalize(vector);
-	float v_angle = safe_acosf(-vector.z);
-	float h_angle = atan2f(vector.x, vector.y) + M_PI_F;
+  vector = normalize(vector);
+  float v_angle = safe_acosf(-vector.z);
+  float h_angle = atan2f(vector.x, vector.y) + M_PI_F;
 
-	float fac = strength * kernel_ies_interp(kg, slot, h_angle, v_angle);
+  float fac = strength * kernel_ies_interp(kg, slot, h_angle, v_angle);
 
-	if(stack_valid(fac_offset)) {
-		stack_store_float(stack, fac_offset, fac);
-	}
+  if (stack_valid(fac_offset)) {
+    stack_store_float(stack, fac_offset, fac);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 81ee79c984e..ee4b8b6e50c 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -16,190 +16,192 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
+ccl_device float4
+svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
 {
-	float4 r = kernel_tex_image_interp(kg, id, x, y);
-	const float alpha = r.w;
-
-	if(use_alpha && alpha != 1.0f && alpha != 0.0f) {
-		r /= alpha;
-		const int texture_type = kernel_tex_type(id);
-		if(texture_type == IMAGE_DATA_TYPE_BYTE4 ||
-		   texture_type == IMAGE_DATA_TYPE_BYTE)
-		{
-			r = min(r, make_float4(1.0f, 1.0f, 1.0f, 1.0f));
-		}
-		r.w = alpha;
-	}
-
-	if(srgb) {
-		/* TODO(lukas): Implement proper conversion for image textures. */
-		r = color_srgb_to_linear_v4(r);
-	}
-
-	return r;
+  float4 r = kernel_tex_image_interp(kg, id, x, y);
+  const float alpha = r.w;
+
+  if (use_alpha && alpha != 1.0f && alpha != 0.0f) {
+    r /= alpha;
+    const int texture_type = kernel_tex_type(id);
+    if (texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) {
+      r = min(r, make_float4(1.0f, 1.0f, 1.0f, 1.0f));
+    }
+    r.w = alpha;
+  }
+
+  if (srgb) {
+    /* TODO(lukas): Implement proper conversion for image textures. */
+    r = color_srgb_to_linear_v4(r);
+  }
+
+  return r;
 }
 
 /* Remap coordnate from 0..1 box to -1..-1 */
 ccl_device_inline float3 texco_remap_square(float3 co)
 {
-	return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
+  return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
 }
 
 ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-	uint id = node.y;
-	uint co_offset, out_offset, alpha_offset, srgb;
-
-	decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
-
-	float3 co = stack_load_float3(stack, co_offset);
-	float2 tex_co;
-	uint use_alpha = stack_valid(alpha_offset);
-	if(node.w == NODE_IMAGE_PROJ_SPHERE) {
-		co = texco_remap_square(co);
-		tex_co = map_to_sphere(co);
-	}
-	else if(node.w == NODE_IMAGE_PROJ_TUBE) {
-		co = texco_remap_square(co);
-		tex_co = map_to_tube(co);
-	}
-	else {
-		tex_co = make_float2(co.x, co.y);
-	}
-	float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, srgb, use_alpha);
-
-	if(stack_valid(out_offset))
-		stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
-	if(stack_valid(alpha_offset))
-		stack_store_float(stack, alpha_offset, f.w);
+  uint id = node.y;
+  uint co_offset, out_offset, alpha_offset, srgb;
+
+  decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
+
+  float3 co = stack_load_float3(stack, co_offset);
+  float2 tex_co;
+  uint use_alpha = stack_valid(alpha_offset);
+  if (node.w == NODE_IMAGE_PROJ_SPHERE) {
+    co = texco_remap_square(co);
+    tex_co = map_to_sphere(co);
+  }
+  else if (node.w == NODE_IMAGE_PROJ_TUBE) {
+    co = texco_remap_square(co);
+    tex_co = map_to_tube(co);
+  }
+  else {
+    tex_co = make_float2(co.x, co.y);
+  }
+  float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, srgb, use_alpha);
+
+  if (stack_valid(out_offset))
+    stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
+  if (stack_valid(alpha_offset))
+    stack_store_float(stack, alpha_offset, f.w);
 }
 
 ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-	/* get object space normal */
-	float3 N = sd->N;
-
-	N = sd->N;
-	object_inverse_normal_transform(kg, sd, &N);
-
-	/* project from direction vector to barycentric coordinates in triangles */
-	float3 signed_N = N;
-
-	N.x = fabsf(N.x);
-	N.y = fabsf(N.y);
-	N.z = fabsf(N.z);
-
-	N /= (N.x + N.y + N.z);
-
-	/* basic idea is to think of this as a triangle, each corner representing
-	 * one of the 3 faces of the cube. in the corners we have single textures,
-	 * in between we blend between two textures, and in the middle we a blend
-	 * between three textures.
-	 *
-	 * the Nxyz values are the barycentric coordinates in an equilateral
-	 * triangle, which in case of blending, in the middle has a smaller
-	 * equilateral triangle where 3 textures blend. this divides things into
-	 * 7 zones, with an if() test for each zone */
-
-	float3 weight = make_float3(0.0f, 0.0f, 0.0f);
-	float blend = __int_as_float(node.w);
-	float limit = 0.5f*(1.0f + blend);
-
-	/* first test for corners with single texture */
-	if(N.x > limit*(N.x + N.y) && N.x > limit*(N.x + N.z)) {
-		weight.x = 1.0f;
-	}
-	else if(N.y > limit*(N.x + N.y) && N.y > limit*(N.y + N.z)) {
-		weight.y = 1.0f;
-	}
-	else if(N.z > limit*(N.x + N.z) && N.z > limit*(N.y + N.z)) {
-		weight.z = 1.0f;
-	}
-	else if(blend > 0.0f) {
-		/* in case of blending, test for mixes between two textures */
-		if(N.z < (1.0f - limit)*(N.y + N.x)) {
-			weight.x = N.x/(N.x + N.y);
-			weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend);
-			weight.y = 1.0f - weight.x;
-		}
-		else if(N.x < (1.0f - limit)*(N.y + N.z)) {
-			weight.y = N.y/(N.y + N.z);
-			weight.y = saturate((weight.y - 0.5f*(1.0f - blend))/blend);
-			weight.z = 1.0f - weight.y;
-		}
-		else if(N.y < (1.0f - limit)*(N.x + N.z)) {
-			weight.x = N.x/(N.x + N.z);
-			weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend);
-			weight.z = 1.0f - weight.x;
-		}
-		else {
-			/* last case, we have a mix between three */
-			weight.x = ((2.0f - limit)*N.x + (limit - 1.0f))/(2.0f*limit - 1.0f);
-			weight.y = ((2.0f - limit)*N.y + (limit - 1.0f))/(2.0f*limit - 1.0f);
-			weight.z = ((2.0f - limit)*N.z + (limit - 1.0f))/(2.0f*limit - 1.0f);
-		}
-	}
-	else {
-		/* Desperate mode, no valid choice anyway, fallback to one side.*/
-		weight.x = 1.0f;
-	}
-
-	/* now fetch textures */
-	uint co_offset, out_offset, alpha_offset, srgb;
-	decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
-
-	float3 co = stack_load_float3(stack, co_offset);
-	uint id = node.y;
-
-	float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-	uint use_alpha = stack_valid(alpha_offset);
-
-	/* Map so that no textures are flipped, rotation is somewhat arbitrary. */
-	if(weight.x > 0.0f) {
-		float2 uv = make_float2((signed_N.x < 0.0f)? 1.0f - co.y: co.y, co.z);
-		f += weight.x*svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
-	}
-	if(weight.y > 0.0f) {
-		float2 uv = make_float2((signed_N.y > 0.0f)? 1.0f - co.x: co.x, co.z);
-		f += weight.y*svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
-	}
-	if(weight.z > 0.0f) {
-		float2 uv = make_float2((signed_N.z > 0.0f)? 1.0f - co.y: co.y, co.x);
-		f += weight.z*svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
-	}
-
-	if(stack_valid(out_offset))
-		stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
-	if(stack_valid(alpha_offset))
-		stack_store_float(stack, alpha_offset, f.w);
+  /* get object space normal */
+  float3 N = sd->N;
+
+  N = sd->N;
+  object_inverse_normal_transform(kg, sd, &N);
+
+  /* project from direction vector to barycentric coordinates in triangles */
+  float3 signed_N = N;
+
+  N.x = fabsf(N.x);
+  N.y = fabsf(N.y);
+  N.z = fabsf(N.z);
+
+  N /= (N.x + N.y + N.z);
+
+  /* basic idea is to think of this as a triangle, each corner representing
+   * one of the 3 faces of the cube. in the corners we have single textures,
+   * in between we blend between two textures, and in the middle we a blend
+   * between three textures.
+   *
+   * the Nxyz values are the barycentric coordinates in an equilateral
+   * triangle, which in case of blending, in the middle has a smaller
+   * equilateral triangle where 3 textures blend. this divides things into
+   * 7 zones, with an if() test for each zone */
+
+  float3 weight = make_float3(0.0f, 0.0f, 0.0f);
+  float blend = __int_as_float(node.w);
+  float limit = 0.5f * (1.0f + blend);
+
+  /* first test for corners with single texture */
+  if (N.x > limit * (N.x + N.y) && N.x > limit * (N.x + N.z)) {
+    weight.x = 1.0f;
+  }
+  else if (N.y > limit * (N.x + N.y) && N.y > limit * (N.y + N.z)) {
+    weight.y = 1.0f;
+  }
+  else if (N.z > limit * (N.x + N.z) && N.z > limit * (N.y + N.z)) {
+    weight.z = 1.0f;
+  }
+  else if (blend > 0.0f) {
+    /* in case of blending, test for mixes between two textures */
+    if (N.z < (1.0f - limit) * (N.y + N.x)) {
+      weight.x = N.x / (N.x + N.y);
+      weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend);
+      weight.y = 1.0f - weight.x;
+    }
+    else if (N.x < (1.0f - limit) * (N.y + N.z)) {
+      weight.y = N.y / (N.y + N.z);
+      weight.y = saturate((weight.y - 0.5f * (1.0f - blend)) / blend);
+      weight.z = 1.0f - weight.y;
+    }
+    else if (N.y < (1.0f - limit) * (N.x + N.z)) {
+      weight.x = N.x / (N.x + N.z);
+      weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend);
+      weight.z = 1.0f - weight.x;
+    }
+    else {
+      /* last case, we have a mix between three */
+      weight.x = ((2.0f - limit) * N.x + (limit - 1.0f)) / (2.0f * limit - 1.0f);
+      weight.y = ((2.0f - limit) * N.y + (limit - 1.0f)) / (2.0f * limit - 1.0f);
+      weight.z = ((2.0f - limit) * N.z + (limit - 1.0f)) / (2.0f * limit - 1.0f);
+    }
+  }
+  else {
+    /* Desperate mode, no valid choice anyway, fallback to one side.*/
+    weight.x = 1.0f;
+  }
+
+  /* now fetch textures */
+  uint co_offset, out_offset, alpha_offset, srgb;
+  decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
+
+  float3 co = stack_load_float3(stack, co_offset);
+  uint id = node.y;
+
+  float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  uint use_alpha = stack_valid(alpha_offset);
+
+  /* Map so that no textures are flipped, rotation is somewhat arbitrary. */
+  if (weight.x > 0.0f) {
+    float2 uv = make_float2((signed_N.x < 0.0f) ? 1.0f - co.y : co.y, co.z);
+    f += weight.x * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+  }
+  if (weight.y > 0.0f) {
+    float2 uv = make_float2((signed_N.y > 0.0f) ? 1.0f - co.x : co.x, co.z);
+    f += weight.y * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+  }
+  if (weight.z > 0.0f) {
+    float2 uv = make_float2((signed_N.z > 0.0f) ? 1.0f - co.y : co.y, co.x);
+    f += weight.z * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+  }
+
+  if (stack_valid(out_offset))
+    stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
+  if (stack_valid(alpha_offset))
+    stack_store_float(stack, alpha_offset, f.w);
 }
 
-ccl_device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device void svm_node_tex_environment(KernelGlobals *kg,
+                                         ShaderData *sd,
+                                         float *stack,
+                                         uint4 node)
 {
-	uint id = node.y;
-	uint co_offset, out_offset, alpha_offset, srgb;
-	uint projection = node.w;
+  uint id = node.y;
+  uint co_offset, out_offset, alpha_offset, srgb;
+  uint projection = node.w;
 
-	decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
+  decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
 
-	float3 co = stack_load_float3(stack, co_offset);
-	float2 uv;
+  float3 co = stack_load_float3(stack, co_offset);
+  float2 uv;
 
-	co = safe_normalize(co);
+  co = safe_normalize(co);
 
-	if(projection == 0)
-		uv = direction_to_equirectangular(co);
-	else
-		uv = direction_to_mirrorball(co);
+  if (projection == 0)
+    uv = direction_to_equirectangular(co);
+  else
+    uv = direction_to_mirrorball(co);
 
-	uint use_alpha = stack_valid(alpha_offset);
-	float4 f = svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
+  uint use_alpha = stack_valid(alpha_offset);
+  float4 f = svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
 
-	if(stack_valid(out_offset))
-		stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
-	if(stack_valid(alpha_offset))
-		stack_store_float(stack, alpha_offset, f.w);
+  if (stack_valid(out_offset))
+    stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
+  if (stack_valid(alpha_offset))
+    stack_store_float(stack, alpha_offset, f.w);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_invert.h b/intern/cycles/kernel/svm/svm_invert.h
index 57cc4281101..02024742b13 100644
--- a/intern/cycles/kernel/svm/svm_invert.h
+++ b/intern/cycles/kernel/svm/svm_invert.h
@@ -18,20 +18,21 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device float invert(float color, float factor)
 {
-	return factor*(1.0f - color) + (1.0f - factor) * color;
+  return factor * (1.0f - color) + (1.0f - factor) * color;
 }
 
-ccl_device void svm_node_invert(ShaderData *sd, float *stack, uint in_fac, uint in_color, uint out_color)
+ccl_device void svm_node_invert(
+    ShaderData *sd, float *stack, uint in_fac, uint in_color, uint out_color)
 {
-	float factor = stack_load_float(stack, in_fac);
-	float3 color = stack_load_float3(stack, in_color);
+  float factor = stack_load_float(stack, in_fac);
+  float3 color = stack_load_float3(stack, in_color);
 
-	color.x = invert(color.x, factor);
-	color.y = invert(color.y, factor);
-	color.z = invert(color.z, factor);
+  color.x = invert(color.x, factor);
+  color.y = invert(color.y, factor);
+  color.z = invert(color.z, factor);
 
-	if(stack_valid(out_color))
-		stack_store_float3(stack, out_color, color);
+  if (stack_valid(out_color))
+    stack_store_float3(stack, out_color, color);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index dd4390057cf..65a9a284a17 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -18,59 +18,99 @@ CCL_NAMESPACE_BEGIN
 
 /* Light Path Node */
 
-ccl_device void svm_node_light_path(ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint type, uint out_offset, int path_flag)
+ccl_device void svm_node_light_path(ShaderData *sd,
+                                    ccl_addr_space PathState *state,
+                                    float *stack,
+                                    uint type,
+                                    uint out_offset,
+                                    int path_flag)
 {
-	float info = 0.0f;
+  float info = 0.0f;
 
-	switch(type) {
-		case NODE_LP_camera: info = (path_flag & PATH_RAY_CAMERA)? 1.0f: 0.0f; break;
-		case NODE_LP_shadow: info = (path_flag & PATH_RAY_SHADOW)? 1.0f: 0.0f; break;
-		case NODE_LP_diffuse: info = (path_flag & PATH_RAY_DIFFUSE)? 1.0f: 0.0f; break;
-		case NODE_LP_glossy: info = (path_flag & PATH_RAY_GLOSSY)? 1.0f: 0.0f; break;
-		case NODE_LP_singular: info = (path_flag & PATH_RAY_SINGULAR)? 1.0f: 0.0f; break;
-		case NODE_LP_reflection: info = (path_flag & PATH_RAY_REFLECT)? 1.0f: 0.0f; break;
-		case NODE_LP_transmission: info = (path_flag & PATH_RAY_TRANSMIT)? 1.0f: 0.0f; break;
-		case NODE_LP_volume_scatter: info = (path_flag & PATH_RAY_VOLUME_SCATTER)? 1.0f: 0.0f; break;
-		case NODE_LP_backfacing: info = (sd->flag & SD_BACKFACING)? 1.0f: 0.0f; break;
-		case NODE_LP_ray_length: info = sd->ray_length; break;
-		case NODE_LP_ray_depth: info = (float)state->bounce; break;
-		case NODE_LP_ray_diffuse: info = (float)state->diffuse_bounce; break;
-		case NODE_LP_ray_glossy: info = (float)state->glossy_bounce; break;
-		case NODE_LP_ray_transparent: info = (float)state->transparent_bounce; break;
-		case NODE_LP_ray_transmission: info = (float)state->transmission_bounce; break;
-	}
+  switch (type) {
+    case NODE_LP_camera:
+      info = (path_flag & PATH_RAY_CAMERA) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_shadow:
+      info = (path_flag & PATH_RAY_SHADOW) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_diffuse:
+      info = (path_flag & PATH_RAY_DIFFUSE) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_glossy:
+      info = (path_flag & PATH_RAY_GLOSSY) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_singular:
+      info = (path_flag & PATH_RAY_SINGULAR) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_reflection:
+      info = (path_flag & PATH_RAY_REFLECT) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_transmission:
+      info = (path_flag & PATH_RAY_TRANSMIT) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_volume_scatter:
+      info = (path_flag & PATH_RAY_VOLUME_SCATTER) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_backfacing:
+      info = (sd->flag & SD_BACKFACING) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_ray_length:
+      info = sd->ray_length;
+      break;
+    case NODE_LP_ray_depth:
+      info = (float)state->bounce;
+      break;
+    case NODE_LP_ray_diffuse:
+      info = (float)state->diffuse_bounce;
+      break;
+    case NODE_LP_ray_glossy:
+      info = (float)state->glossy_bounce;
+      break;
+    case NODE_LP_ray_transparent:
+      info = (float)state->transparent_bounce;
+      break;
+    case NODE_LP_ray_transmission:
+      info = (float)state->transmission_bounce;
+      break;
+  }
 
-	stack_store_float(stack, out_offset, info);
+  stack_store_float(stack, out_offset, info);
 }
 
 /* Light Falloff Node */
 
 ccl_device void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node)
 {
-	uint strength_offset, out_offset, smooth_offset;
+  uint strength_offset, out_offset, smooth_offset;
 
-	decode_node_uchar4(node.z, &strength_offset, &smooth_offset, &out_offset, NULL);
+  decode_node_uchar4(node.z, &strength_offset, &smooth_offset, &out_offset, NULL);
 
-	float strength = stack_load_float(stack, strength_offset);
-	uint type = node.y;
+  float strength = stack_load_float(stack, strength_offset);
+  uint type = node.y;
 
-	switch(type) {
-		case NODE_LIGHT_FALLOFF_QUADRATIC: break;
-		case NODE_LIGHT_FALLOFF_LINEAR: strength *= sd->ray_length; break;
-		case NODE_LIGHT_FALLOFF_CONSTANT: strength *= sd->ray_length*sd->ray_length; break;
-	}
+  switch (type) {
+    case NODE_LIGHT_FALLOFF_QUADRATIC:
+      break;
+    case NODE_LIGHT_FALLOFF_LINEAR:
+      strength *= sd->ray_length;
+      break;
+    case NODE_LIGHT_FALLOFF_CONSTANT:
+      strength *= sd->ray_length * sd->ray_length;
+      break;
+  }
 
-	float smooth = stack_load_float(stack, smooth_offset);
+  float smooth = stack_load_float(stack, smooth_offset);
 
-	if(smooth > 0.0f) {
-		float squared = sd->ray_length*sd->ray_length;
-		/* Distant lamps set the ray length to FLT_MAX, which causes squared to overflow. */
-		if(isfinite(squared)) {
-			strength *= squared/(smooth + squared);
-		}
-	}
+  if (smooth > 0.0f) {
+    float squared = sd->ray_length * sd->ray_length;
+    /* Distant lamps set the ray length to FLT_MAX, which causes squared to overflow. */
+    if (isfinite(squared)) {
+      strength *= squared / (smooth + squared);
+    }
+  }
 
-	stack_store_float(stack, out_offset, strength);
+  stack_store_float(stack, out_offset, strength);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h
index 6afaff37acd..115d2e2fe4b 100644
--- a/intern/cycles/kernel/svm/svm_magic.h
+++ b/intern/cycles/kernel/svm/svm_magic.h
@@ -20,92 +20,93 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_noinline float3 svm_magic(float3 p, int n, float distortion)
 {
-	float x = sinf((p.x + p.y + p.z)*5.0f);
-	float y = cosf((-p.x + p.y - p.z)*5.0f);
-	float z = -cosf((-p.x - p.y + p.z)*5.0f);
-
-	if(n > 0) {
-		x *= distortion;
-		y *= distortion;
-		z *= distortion;
-		y = -cosf(x-y+z);
-		y *= distortion;
-
-		if(n > 1) {
-			x = cosf(x-y-z);
-			x *= distortion;
-
-			if(n > 2) {
-				z = sinf(-x-y-z);
-				z *= distortion;
-
-				if(n > 3) {
-					x = -cosf(-x+y-z);
-					x *= distortion;
-
-					if(n > 4) {
-						y = -sinf(-x+y+z);
-						y *= distortion;
-
-						if(n > 5) {
-							y = -cosf(-x+y+z);
-							y *= distortion;
-
-							if(n > 6) {
-								x = cosf(x+y+z);
-								x *= distortion;
-
-								if(n > 7) {
-									z = sinf(x+y-z);
-									z *= distortion;
-
-									if(n > 8) {
-										x = -cosf(-x-y+z);
-										x *= distortion;
-
-										if(n > 9) {
-											y = -sinf(x-y+z);
-											y *= distortion;
-										}
-									}
-								}
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-
-	if(distortion != 0.0f) {
-		distortion *= 2.0f;
-		x /= distortion;
-		y /= distortion;
-		z /= distortion;
-	}
-
-	return make_float3(0.5f - x, 0.5f - y, 0.5f - z);
+  float x = sinf((p.x + p.y + p.z) * 5.0f);
+  float y = cosf((-p.x + p.y - p.z) * 5.0f);
+  float z = -cosf((-p.x - p.y + p.z) * 5.0f);
+
+  if (n > 0) {
+    x *= distortion;
+    y *= distortion;
+    z *= distortion;
+    y = -cosf(x - y + z);
+    y *= distortion;
+
+    if (n > 1) {
+      x = cosf(x - y - z);
+      x *= distortion;
+
+      if (n > 2) {
+        z = sinf(-x - y - z);
+        z *= distortion;
+
+        if (n > 3) {
+          x = -cosf(-x + y - z);
+          x *= distortion;
+
+          if (n > 4) {
+            y = -sinf(-x + y + z);
+            y *= distortion;
+
+            if (n > 5) {
+              y = -cosf(-x + y + z);
+              y *= distortion;
+
+              if (n > 6) {
+                x = cosf(x + y + z);
+                x *= distortion;
+
+                if (n > 7) {
+                  z = sinf(x + y - z);
+                  z *= distortion;
+
+                  if (n > 8) {
+                    x = -cosf(-x - y + z);
+                    x *= distortion;
+
+                    if (n > 9) {
+                      y = -sinf(x - y + z);
+                      y *= distortion;
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (distortion != 0.0f) {
+    distortion *= 2.0f;
+    x /= distortion;
+    y /= distortion;
+    z /= distortion;
+  }
+
+  return make_float3(0.5f - x, 0.5f - y, 0.5f - z);
 }
 
-ccl_device void svm_node_tex_magic(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_magic(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint depth;
-	uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
+  uint depth;
+  uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
 
-	decode_node_uchar4(node.y, &depth, &color_offset, &fac_offset, NULL);
-	decode_node_uchar4(node.z, &co_offset, &scale_offset, &distortion_offset, NULL);
+  decode_node_uchar4(node.y, &depth, &color_offset, &fac_offset, NULL);
+  decode_node_uchar4(node.z, &co_offset, &scale_offset, &distortion_offset, NULL);
 
-	uint4 node2 = read_node(kg, offset);
-	float3 co = stack_load_float3(stack, co_offset);
-	float scale = stack_load_float_default(stack, scale_offset, node2.x);
-	float distortion = stack_load_float_default(stack, distortion_offset, node2.y);
+  uint4 node2 = read_node(kg, offset);
+  float3 co = stack_load_float3(stack, co_offset);
+  float scale = stack_load_float_default(stack, scale_offset, node2.x);
+  float distortion = stack_load_float_default(stack, distortion_offset, node2.y);
 
-	float3 color = svm_magic(co*scale, depth, distortion);
+  float3 color = svm_magic(co * scale, depth, distortion);
 
-	if(stack_valid(fac_offset))
-		stack_store_float(stack, fac_offset, average(color));
-	if(stack_valid(color_offset))
-		stack_store_float3(stack, color_offset, color);
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, average(color));
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, color);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h
index 86181283821..998a29912d4 100644
--- a/intern/cycles/kernel/svm/svm_mapping.h
+++ b/intern/cycles/kernel/svm/svm_mapping.h
@@ -18,28 +18,30 @@ CCL_NAMESPACE_BEGIN
 
 /* Mapping Node */
 
-ccl_device void svm_node_mapping(KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset)
+ccl_device void svm_node_mapping(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset)
 {
-	float3 v = stack_load_float3(stack, vec_offset);
+  float3 v = stack_load_float3(stack, vec_offset);
 
-	Transform tfm;
-	tfm.x = read_node_float(kg, offset);
-	tfm.y = read_node_float(kg, offset);
-	tfm.z = read_node_float(kg, offset);
+  Transform tfm;
+  tfm.x = read_node_float(kg, offset);
+  tfm.y = read_node_float(kg, offset);
+  tfm.z = read_node_float(kg, offset);
 
-	float3 r = transform_point(&tfm, v);
-	stack_store_float3(stack, out_offset, r);
+  float3 r = transform_point(&tfm, v);
+  stack_store_float3(stack, out_offset, r);
 }
 
-ccl_device void svm_node_min_max(KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset)
+ccl_device void svm_node_min_max(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset)
 {
-	float3 v = stack_load_float3(stack, vec_offset);
+  float3 v = stack_load_float3(stack, vec_offset);
 
-	float3 mn = float4_to_float3(read_node_float(kg, offset));
-	float3 mx = float4_to_float3(read_node_float(kg, offset));
+  float3 mn = float4_to_float3(read_node_float(kg, offset));
+  float3 mx = float4_to_float3(read_node_float(kg, offset));
 
-	float3 r = min(max(mn, v), mx);
-	stack_store_float3(stack, out_offset, r);
+  float3 r = min(max(mn, v), mx);
+  stack_store_float3(stack, out_offset, r);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
index c9a838361cd..5920913825b 100644
--- a/intern/cycles/kernel/svm/svm_math.h
+++ b/intern/cycles/kernel/svm/svm_math.h
@@ -18,32 +18,46 @@ CCL_NAMESPACE_BEGIN
 
 /* Nodes */
 
-ccl_device void svm_node_math(KernelGlobals *kg, ShaderData *sd, float *stack, uint itype, uint f1_offset, uint f2_offset, int *offset)
+ccl_device void svm_node_math(KernelGlobals *kg,
+                              ShaderData *sd,
+                              float *stack,
+                              uint itype,
+                              uint f1_offset,
+                              uint f2_offset,
+                              int *offset)
 {
-	NodeMath type = (NodeMath)itype;
-	float f1 = stack_load_float(stack, f1_offset);
-	float f2 = stack_load_float(stack, f2_offset);
-	float f = svm_math(type, f1, f2);
+  NodeMath type = (NodeMath)itype;
+  float f1 = stack_load_float(stack, f1_offset);
+  float f2 = stack_load_float(stack, f2_offset);
+  float f = svm_math(type, f1, f2);
 
-	uint4 node1 = read_node(kg, offset);
+  uint4 node1 = read_node(kg, offset);
 
-	stack_store_float(stack, node1.y, f);
+  stack_store_float(stack, node1.y, f);
 }
 
-ccl_device void svm_node_vector_math(KernelGlobals *kg, ShaderData *sd, float *stack, uint itype, uint v1_offset, uint v2_offset, int *offset)
+ccl_device void svm_node_vector_math(KernelGlobals *kg,
+                                     ShaderData *sd,
+                                     float *stack,
+                                     uint itype,
+                                     uint v1_offset,
+                                     uint v2_offset,
+                                     int *offset)
 {
-	NodeVectorMath type = (NodeVectorMath)itype;
-	float3 v1 = stack_load_float3(stack, v1_offset);
-	float3 v2 = stack_load_float3(stack, v2_offset);
-	float f;
-	float3 v;
+  NodeVectorMath type = (NodeVectorMath)itype;
+  float3 v1 = stack_load_float3(stack, v1_offset);
+  float3 v2 = stack_load_float3(stack, v2_offset);
+  float f;
+  float3 v;
 
-	svm_vector_math(&f, &v, type, v1, v2);
+  svm_vector_math(&f, &v, type, v1, v2);
 
-	uint4 node1 = read_node(kg, offset);
+  uint4 node1 = read_node(kg, offset);
 
-	if(stack_valid(node1.y)) stack_store_float(stack, node1.y, f);
-	if(stack_valid(node1.z)) stack_store_float3(stack, node1.z, v);
+  if (stack_valid(node1.y))
+    stack_store_float(stack, node1.y, f);
+  if (stack_valid(node1.z))
+    stack_store_float3(stack, node1.z, v);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index 669b174e4a3..e3544515f1b 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -18,96 +18,97 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device float average_fac(float3 v)
 {
-	return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z))/3.0f;
+  return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z)) / 3.0f;
 }
 
-ccl_device void svm_vector_math(float *Fac, float3 *Vector, NodeVectorMath type, float3 Vector1, float3 Vector2)
+ccl_device void svm_vector_math(
+    float *Fac, float3 *Vector, NodeVectorMath type, float3 Vector1, float3 Vector2)
 {
-	if(type == NODE_VECTOR_MATH_ADD) {
-		*Vector = Vector1 + Vector2;
-		*Fac = average_fac(*Vector);
-	}
-	else if(type == NODE_VECTOR_MATH_SUBTRACT) {
-		*Vector = Vector1 - Vector2;
-		*Fac = average_fac(*Vector);
-	}
-	else if(type == NODE_VECTOR_MATH_AVERAGE) {
-		*Vector = safe_normalize_len(Vector1 + Vector2, Fac);
-	}
-	else if(type == NODE_VECTOR_MATH_DOT_PRODUCT) {
-		*Fac = dot(Vector1, Vector2);
-		*Vector = make_float3(0.0f, 0.0f, 0.0f);
-	}
-	else if(type == NODE_VECTOR_MATH_CROSS_PRODUCT) {
-		*Vector = safe_normalize_len(cross(Vector1, Vector2), Fac);
-	}
-	else if(type == NODE_VECTOR_MATH_NORMALIZE) {
-		*Vector = safe_normalize_len(Vector1, Fac);
-	}
-	else {
-		*Fac = 0.0f;
-		*Vector = make_float3(0.0f, 0.0f, 0.0f);
-	}
+  if (type == NODE_VECTOR_MATH_ADD) {
+    *Vector = Vector1 + Vector2;
+    *Fac = average_fac(*Vector);
+  }
+  else if (type == NODE_VECTOR_MATH_SUBTRACT) {
+    *Vector = Vector1 - Vector2;
+    *Fac = average_fac(*Vector);
+  }
+  else if (type == NODE_VECTOR_MATH_AVERAGE) {
+    *Vector = safe_normalize_len(Vector1 + Vector2, Fac);
+  }
+  else if (type == NODE_VECTOR_MATH_DOT_PRODUCT) {
+    *Fac = dot(Vector1, Vector2);
+    *Vector = make_float3(0.0f, 0.0f, 0.0f);
+  }
+  else if (type == NODE_VECTOR_MATH_CROSS_PRODUCT) {
+    *Vector = safe_normalize_len(cross(Vector1, Vector2), Fac);
+  }
+  else if (type == NODE_VECTOR_MATH_NORMALIZE) {
+    *Vector = safe_normalize_len(Vector1, Fac);
+  }
+  else {
+    *Fac = 0.0f;
+    *Vector = make_float3(0.0f, 0.0f, 0.0f);
+  }
 }
 
 ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
 {
-	float Fac;
-
-	if(type == NODE_MATH_ADD)
-		Fac = Fac1 + Fac2;
-	else if(type == NODE_MATH_SUBTRACT)
-		Fac = Fac1 - Fac2;
-	else if(type == NODE_MATH_MULTIPLY)
-		Fac = Fac1*Fac2;
-	else if(type == NODE_MATH_DIVIDE)
-		Fac = safe_divide(Fac1, Fac2);
-	else if(type == NODE_MATH_SINE)
-		Fac = sinf(Fac1);
-	else if(type == NODE_MATH_COSINE)
-		Fac = cosf(Fac1);
-	else if(type == NODE_MATH_TANGENT)
-		Fac = tanf(Fac1);
-	else if(type == NODE_MATH_ARCSINE)
-		Fac = safe_asinf(Fac1);
-	else if(type == NODE_MATH_ARCCOSINE)
-		Fac = safe_acosf(Fac1);
-	else if(type == NODE_MATH_ARCTANGENT)
-		Fac = atanf(Fac1);
-	else if(type == NODE_MATH_POWER)
-		Fac = safe_powf(Fac1, Fac2);
-	else if(type == NODE_MATH_LOGARITHM)
-		Fac = safe_logf(Fac1, Fac2);
-	else if(type == NODE_MATH_MINIMUM)
-		Fac = fminf(Fac1, Fac2);
-	else if(type == NODE_MATH_MAXIMUM)
-		Fac = fmaxf(Fac1, Fac2);
-	else if(type == NODE_MATH_ROUND)
-		Fac = floorf(Fac1 + 0.5f);
-	else if(type == NODE_MATH_LESS_THAN)
-		Fac = Fac1 < Fac2;
-	else if(type == NODE_MATH_GREATER_THAN)
-		Fac = Fac1 > Fac2;
-	else if(type == NODE_MATH_MODULO)
-		Fac = safe_modulo(Fac1, Fac2);
-	else if(type == NODE_MATH_ABSOLUTE)
-		Fac = fabsf(Fac1);
-	else if(type == NODE_MATH_ARCTAN2)
-		Fac = atan2f(Fac1, Fac2);
-	else if(type == NODE_MATH_FLOOR)
-		Fac = floorf(Fac1);
-	else if(type == NODE_MATH_CEIL)
-		Fac = ceilf(Fac1);
-	else if(type == NODE_MATH_FRACT)
-		Fac = Fac1 - floorf(Fac1);
-	else if(type == NODE_MATH_SQRT)
-		Fac = safe_sqrtf(Fac1);
-	else if(type == NODE_MATH_CLAMP)
-		Fac = saturate(Fac1);
-	else
-		Fac = 0.0f;
-
-	return Fac;
+  float Fac;
+
+  if (type == NODE_MATH_ADD)
+    Fac = Fac1 + Fac2;
+  else if (type == NODE_MATH_SUBTRACT)
+    Fac = Fac1 - Fac2;
+  else if (type == NODE_MATH_MULTIPLY)
+    Fac = Fac1 * Fac2;
+  else if (type == NODE_MATH_DIVIDE)
+    Fac = safe_divide(Fac1, Fac2);
+  else if (type == NODE_MATH_SINE)
+    Fac = sinf(Fac1);
+  else if (type == NODE_MATH_COSINE)
+    Fac = cosf(Fac1);
+  else if (type == NODE_MATH_TANGENT)
+    Fac = tanf(Fac1);
+  else if (type == NODE_MATH_ARCSINE)
+    Fac = safe_asinf(Fac1);
+  else if (type == NODE_MATH_ARCCOSINE)
+    Fac = safe_acosf(Fac1);
+  else if (type == NODE_MATH_ARCTANGENT)
+    Fac = atanf(Fac1);
+  else if (type == NODE_MATH_POWER)
+    Fac = safe_powf(Fac1, Fac2);
+  else if (type == NODE_MATH_LOGARITHM)
+    Fac = safe_logf(Fac1, Fac2);
+  else if (type == NODE_MATH_MINIMUM)
+    Fac = fminf(Fac1, Fac2);
+  else if (type == NODE_MATH_MAXIMUM)
+    Fac = fmaxf(Fac1, Fac2);
+  else if (type == NODE_MATH_ROUND)
+    Fac = floorf(Fac1 + 0.5f);
+  else if (type == NODE_MATH_LESS_THAN)
+    Fac = Fac1 < Fac2;
+  else if (type == NODE_MATH_GREATER_THAN)
+    Fac = Fac1 > Fac2;
+  else if (type == NODE_MATH_MODULO)
+    Fac = safe_modulo(Fac1, Fac2);
+  else if (type == NODE_MATH_ABSOLUTE)
+    Fac = fabsf(Fac1);
+  else if (type == NODE_MATH_ARCTAN2)
+    Fac = atan2f(Fac1, Fac2);
+  else if (type == NODE_MATH_FLOOR)
+    Fac = floorf(Fac1);
+  else if (type == NODE_MATH_CEIL)
+    Fac = ceilf(Fac1);
+  else if (type == NODE_MATH_FRACT)
+    Fac = Fac1 - floorf(Fac1);
+  else if (type == NODE_MATH_SQRT)
+    Fac = safe_sqrtf(Fac1);
+  else if (type == NODE_MATH_CLAMP)
+    Fac = saturate(Fac1);
+  else
+    Fac = 0.0f;
+
+  return Fac;
 }
 
 /* Calculate color in range 800..12000 using an approximation
@@ -117,74 +118,72 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
  */
 
 ccl_static_constant float blackbody_table_r[6][3] = {
-	{  2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f },
-	{  3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f },
-	{  4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f },
-	{  4.66849800e+03f,  2.85655028e-05f, 1.29075375e-01f },
-	{  4.60124770e+03f,  2.89727618e-05f, 1.48001316e-01f },
-	{  3.78765709e+03f,  9.36026367e-06f, 3.98995841e-01f },
+    {2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f},
+    {3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f},
+    {4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f},
+    {4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f},
+    {4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f},
+    {3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f},
 };
 
 ccl_static_constant float blackbody_table_g[6][3] = {
-	{ -7.50343014e+02f,  3.15679613e-04f, 4.73464526e-01f },
-	{ -1.00402363e+03f,  1.29189794e-04f, 9.08181524e-01f },
-	{ -1.22075471e+03f,  2.56245413e-05f, 1.20753416e+00f },
-	{ -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f },
-	{ -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f },
-	{ -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f },
+    {-7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f},
+    {-1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f},
+    {-1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f},
+    {-1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f},
+    {-1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f},
+    {-5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f},
 };
 
 ccl_static_constant float blackbody_table_b[6][4] = {
-	{ 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */
-	{ 0.0f, 0.0f, 0.0f, 0.0f },
-	{ 0.0f, 0.0f, 0.0f, 0.0f },
-	{ -2.02524603e-11f,  1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f },
-	{ -2.22463426e-13f, -1.55078698e-08f,  3.81675160e-04f, -7.30646033e-01f },
-	{  6.72595954e-13f, -2.73059993e-08f,  4.24068546e-04f, -7.52204323e-01f },
+    {0.0f, 0.0f, 0.0f, 0.0f}, /* zeros should be optimized by compiler */
+    {0.0f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 0.0f, 0.0f, 0.0f},
+    {-2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f},
+    {-2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f},
+    {6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f},
 };
 
-
 ccl_device float3 svm_math_blackbody_color(float t)
 {
-	/* TODO(lukas): Reimplement in XYZ. */
-
-	if(t >= 12000.0f) {
-		return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
-	}
-	else if(t < 965.0f) {
-		/* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */
-		return make_float3(4.70366907f, 0.0f, 0.0f);
-	}
-
-	int i = (t >= 6365.0f)? 5:
-		(t >= 3315.0f)? 4:
-		(t >= 1902.0f)? 3:
-		(t >= 1449.0f)? 2:
-		(t >= 1167.0f)? 1: 0;
-
-	ccl_constant float *r = blackbody_table_r[i];
-	ccl_constant float *g = blackbody_table_g[i];
-	ccl_constant float *b = blackbody_table_b[i];
-
-	const float t_inv = 1.0f / t;
-	return make_float3(r[0] * t_inv + r[1] * t + r[2],
-	                   g[0] * t_inv + g[1] * t + g[2],
-	                   ((b[0] * t + b[1]) * t + b[2]) * t + b[3]);
+  /* TODO(lukas): Reimplement in XYZ. */
+
+  if (t >= 12000.0f) {
+    return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
+  }
+  else if (t < 965.0f) {
+    /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */
+    return make_float3(4.70366907f, 0.0f, 0.0f);
+  }
+
+  int i = (t >= 6365.0f) ?
+              5 :
+              (t >= 3315.0f) ? 4 :
+                               (t >= 1902.0f) ? 3 : (t >= 1449.0f) ? 2 : (t >= 1167.0f) ? 1 : 0;
+
+  ccl_constant float *r = blackbody_table_r[i];
+  ccl_constant float *g = blackbody_table_g[i];
+  ccl_constant float *b = blackbody_table_b[i];
+
+  const float t_inv = 1.0f / t;
+  return make_float3(r[0] * t_inv + r[1] * t + r[2],
+                     g[0] * t_inv + g[1] * t + g[2],
+                     ((b[0] * t + b[1]) * t + b[2]) * t + b[3]);
 }
 
 ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma)
 {
-	if(gamma == 0.0f)
-		return make_float3(1.0f, 1.0f, 1.0f);
+  if (gamma == 0.0f)
+    return make_float3(1.0f, 1.0f, 1.0f);
 
-	if(color.x > 0.0f)
-		color.x = powf(color.x, gamma);
-	if(color.y > 0.0f)
-		color.y = powf(color.y, gamma);
-	if(color.z > 0.0f)
-		color.z = powf(color.z, gamma);
+  if (color.x > 0.0f)
+    color.x = powf(color.x, gamma);
+  if (color.y > 0.0f)
+    color.y = powf(color.y, gamma);
+  if (color.z > 0.0f)
+    color.z = powf(color.z, gamma);
 
-	return color;
+  return color;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h
index 903a4dacebf..15114bfd5e4 100644
--- a/intern/cycles/kernel/svm/svm_mix.h
+++ b/intern/cycles/kernel/svm/svm_mix.h
@@ -18,17 +18,23 @@ CCL_NAMESPACE_BEGIN
 
 /* Node */
 
-ccl_device void svm_node_mix(KernelGlobals *kg, ShaderData *sd, float *stack, uint fac_offset, uint c1_offset, uint c2_offset, int *offset)
+ccl_device void svm_node_mix(KernelGlobals *kg,
+                             ShaderData *sd,
+                             float *stack,
+                             uint fac_offset,
+                             uint c1_offset,
+                             uint c2_offset,
+                             int *offset)
 {
-	/* read extra data */
-	uint4 node1 = read_node(kg, offset);
+  /* read extra data */
+  uint4 node1 = read_node(kg, offset);
 
-	float fac = stack_load_float(stack, fac_offset);
-	float3 c1 = stack_load_float3(stack, c1_offset);
-	float3 c2 = stack_load_float3(stack, c2_offset);
-	float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2);
+  float fac = stack_load_float(stack, fac_offset);
+  float3 c1 = stack_load_float3(stack, c1_offset);
+  float3 c2 = stack_load_float3(stack, c2_offset);
+  float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2);
 
-	stack_store_float3(stack, node1.z, result);
+  stack_store_float3(stack, node1.z, result);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 5d9e12628ca..67fb5ca6241 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -27,23 +27,23 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity, float octaves)
 {
-	float rmd;
-	float value = 0.0f;
-	float pwr = 1.0f;
-	float pwHL = powf(lacunarity, -H);
-	int i;
-
-	for(i = 0; i < float_to_int(octaves); i++) {
-		value += snoise(p) * pwr;
-		pwr *= pwHL;
-		p *= lacunarity;
-	}
-
-	rmd = octaves - floorf(octaves);
-	if(rmd != 0.0f)
-		value += rmd * snoise(p) * pwr;
-
-	return value;
+  float rmd;
+  float value = 0.0f;
+  float pwr = 1.0f;
+  float pwHL = powf(lacunarity, -H);
+  int i;
+
+  for (i = 0; i < float_to_int(octaves); i++) {
+    value += snoise(p) * pwr;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f)
+    value += rmd * snoise(p) * pwr;
+
+  return value;
 }
 
 /* Musgrave Multifractal
@@ -53,25 +53,28 @@ ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity
  * octaves: number of frequencies in the fBm
  */
 
-ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, float H, float lacunarity, float octaves)
+ccl_device_noinline float noise_musgrave_multi_fractal(float3 p,
+                                                       float H,
+                                                       float lacunarity,
+                                                       float octaves)
 {
-	float rmd;
-	float value = 1.0f;
-	float pwr = 1.0f;
-	float pwHL = powf(lacunarity, -H);
-	int i;
-
-	for(i = 0; i < float_to_int(octaves); i++) {
-		value *= (pwr * snoise(p) + 1.0f);
-		pwr *= pwHL;
-		p *= lacunarity;
-	}
-
-	rmd = octaves - floorf(octaves);
-	if(rmd != 0.0f)
-		value *= (rmd * pwr * snoise(p) + 1.0f); /* correct? */
-
-	return value;
+  float rmd;
+  float value = 1.0f;
+  float pwr = 1.0f;
+  float pwHL = powf(lacunarity, -H);
+  int i;
+
+  for (i = 0; i < float_to_int(octaves); i++) {
+    value *= (pwr * snoise(p) + 1.0f);
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f)
+    value *= (rmd * pwr * snoise(p) + 1.0f); /* correct? */
+
+  return value;
 }
 
 /* Musgrave Heterogeneous Terrain
@@ -82,31 +85,32 @@ ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, float H, float
  * offset: raises the terrain from `sea level'
  */
 
-ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, float H, float lacunarity, float octaves, float offset)
+ccl_device_noinline float noise_musgrave_hetero_terrain(
+    float3 p, float H, float lacunarity, float octaves, float offset)
 {
-	float value, increment, rmd;
-	float pwHL = powf(lacunarity, -H);
-	float pwr = pwHL;
-	int i;
-
-	/* first unscaled octave of function; later octaves are scaled */
-	value = offset + snoise(p);
-	p *= lacunarity;
-
-	for(i = 1; i < float_to_int(octaves); i++) {
-		increment = (snoise(p) + offset) * pwr * value;
-		value += increment;
-		pwr *= pwHL;
-		p *= lacunarity;
-	}
-
-	rmd = octaves - floorf(octaves);
-	if(rmd != 0.0f) {
-		increment = (snoise(p) + offset) * pwr * value;
-		value += rmd * increment;
-	}
-
-	return value;
+  float value, increment, rmd;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+  int i;
+
+  /* first unscaled octave of function; later octaves are scaled */
+  value = offset + snoise(p);
+  p *= lacunarity;
+
+  for (i = 1; i < float_to_int(octaves); i++) {
+    increment = (snoise(p) + offset) * pwr * value;
+    value += increment;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    increment = (snoise(p) + offset) * pwr * value;
+    value += rmd * increment;
+  }
+
+  return value;
 }
 
 /* Hybrid Additive/Multiplicative Multifractal Terrain
@@ -117,33 +121,34 @@ ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, float H, float
  * offset: raises the terrain from `sea level'
  */
 
-ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, float H, float lacunarity, float octaves, float offset, float gain)
+ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(
+    float3 p, float H, float lacunarity, float octaves, float offset, float gain)
 {
-	float result, signal, weight, rmd;
-	float pwHL = powf(lacunarity, -H);
-	float pwr = pwHL;
-	int i;
-
-	result = snoise(p) + offset;
-	weight = gain * result;
-	p *= lacunarity;
-
-	for(i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
-		if(weight > 1.0f)
-			weight = 1.0f;
-
-		signal = (snoise(p) + offset) * pwr;
-		pwr *= pwHL;
-		result += weight * signal;
-		weight *= gain * signal;
-		p *= lacunarity;
-	}
-
-	rmd = octaves - floorf(octaves);
-	if(rmd != 0.0f)
-		result += rmd * ((snoise(p) + offset) * pwr);
-
-	return result;
+  float result, signal, weight, rmd;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+  int i;
+
+  result = snoise(p) + offset;
+  weight = gain * result;
+  p *= lacunarity;
+
+  for (i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+    if (weight > 1.0f)
+      weight = 1.0f;
+
+    signal = (snoise(p) + offset) * pwr;
+    pwr *= pwHL;
+    result += weight * signal;
+    weight *= gain * signal;
+    p *= lacunarity;
+  }
+
+  rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f)
+    result += rmd * ((snoise(p) + offset) * pwr);
+
+  return result;
 }
 
 /* Ridged Multifractal Terrain
@@ -154,81 +159,93 @@ ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, float H,
  * offset: raises the terrain from `sea level'
  */
 
-ccl_device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, float H, float lacunarity, float octaves, float offset, float gain)
+ccl_device_noinline float noise_musgrave_ridged_multi_fractal(
+    float3 p, float H, float lacunarity, float octaves, float offset, float gain)
 {
-	float result, signal, weight;
-	float pwHL = powf(lacunarity, -H);
-	float pwr = pwHL;
-	int i;
-
-	signal = offset - fabsf(snoise(p));
-	signal *= signal;
-	result = signal;
-	weight = 1.0f;
-
-	for(i = 1; i < float_to_int(octaves); i++) {
-		p *= lacunarity;
-		weight = saturate(signal * gain);
-		signal = offset - fabsf(snoise(p));
-		signal *= signal;
-		signal *= weight;
-		result += signal * pwr;
-		pwr *= pwHL;
-	}
-
-	return result;
+  float result, signal, weight;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+  int i;
+
+  signal = offset - fabsf(snoise(p));
+  signal *= signal;
+  result = signal;
+  weight = 1.0f;
+
+  for (i = 1; i < float_to_int(octaves); i++) {
+    p *= lacunarity;
+    weight = saturate(signal * gain);
+    signal = offset - fabsf(snoise(p));
+    signal *= signal;
+    signal *= weight;
+    result += signal * pwr;
+    pwr *= pwHL;
+  }
+
+  return result;
 }
 
 /* Shader */
 
-ccl_device float svm_musgrave(NodeMusgraveType type, float dimension, float lacunarity, float octaves, float offset, float intensity, float gain, float3 p)
+ccl_device float svm_musgrave(NodeMusgraveType type,
+                              float dimension,
+                              float lacunarity,
+                              float octaves,
+                              float offset,
+                              float intensity,
+                              float gain,
+                              float3 p)
 {
-	if(type == NODE_MUSGRAVE_MULTIFRACTAL)
-		return intensity*noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
-	else if(type == NODE_MUSGRAVE_FBM)
-		return intensity*noise_musgrave_fBm(p, dimension, lacunarity, octaves);
-	else if(type == NODE_MUSGRAVE_HYBRID_MULTIFRACTAL)
-		return intensity*noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
-	else if(type == NODE_MUSGRAVE_RIDGED_MULTIFRACTAL)
-		return intensity*noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
-	else if(type == NODE_MUSGRAVE_HETERO_TERRAIN)
-		return intensity*noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, offset);
-
-	return 0.0f;
+  if (type == NODE_MUSGRAVE_MULTIFRACTAL)
+    return intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
+  else if (type == NODE_MUSGRAVE_FBM)
+    return intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves);
+  else if (type == NODE_MUSGRAVE_HYBRID_MULTIFRACTAL)
+    return intensity *
+           noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
+  else if (type == NODE_MUSGRAVE_RIDGED_MULTIFRACTAL)
+    return intensity *
+           noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
+  else if (type == NODE_MUSGRAVE_HETERO_TERRAIN)
+    return intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, offset);
+
+  return 0.0f;
 }
 
-ccl_device void svm_node_tex_musgrave(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_musgrave(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint4 node2 = read_node(kg, offset);
-	uint4 node3 = read_node(kg, offset);
-
-	uint type, co_offset, color_offset, fac_offset;
-	uint dimension_offset, lacunarity_offset, detail_offset, offset_offset;
-	uint gain_offset, scale_offset;
-
-	decode_node_uchar4(node.y, &type, &co_offset, &color_offset, &fac_offset);
-	decode_node_uchar4(node.z, &dimension_offset, &lacunarity_offset, &detail_offset, &offset_offset);
-	decode_node_uchar4(node.w, &gain_offset, &scale_offset, NULL, NULL);
-
-	float3 co = stack_load_float3(stack, co_offset);
-	float dimension = stack_load_float_default(stack, dimension_offset, node2.x);
-	float lacunarity = stack_load_float_default(stack, lacunarity_offset, node2.y);
-	float detail = stack_load_float_default(stack, detail_offset, node2.z);
-	float foffset = stack_load_float_default(stack, offset_offset, node2.w);
-	float gain = stack_load_float_default(stack, gain_offset, node3.x);
-	float scale = stack_load_float_default(stack, scale_offset, node3.y);
-
-	dimension = fmaxf(dimension, 1e-5f);
-	detail = clamp(detail, 0.0f, 16.0f);
-	lacunarity = fmaxf(lacunarity, 1e-5f);
-
-	float f = svm_musgrave((NodeMusgraveType)type,
-		dimension, lacunarity, detail, foffset, 1.0f, gain, co*scale);
-
-	if(stack_valid(fac_offset))
-		stack_store_float(stack, fac_offset, f);
-	if(stack_valid(color_offset))
-		stack_store_float3(stack, color_offset, make_float3(f, f, f));
+  uint4 node2 = read_node(kg, offset);
+  uint4 node3 = read_node(kg, offset);
+
+  uint type, co_offset, color_offset, fac_offset;
+  uint dimension_offset, lacunarity_offset, detail_offset, offset_offset;
+  uint gain_offset, scale_offset;
+
+  decode_node_uchar4(node.y, &type, &co_offset, &color_offset, &fac_offset);
+  decode_node_uchar4(
+      node.z, &dimension_offset, &lacunarity_offset, &detail_offset, &offset_offset);
+  decode_node_uchar4(node.w, &gain_offset, &scale_offset, NULL, NULL);
+
+  float3 co = stack_load_float3(stack, co_offset);
+  float dimension = stack_load_float_default(stack, dimension_offset, node2.x);
+  float lacunarity = stack_load_float_default(stack, lacunarity_offset, node2.y);
+  float detail = stack_load_float_default(stack, detail_offset, node2.z);
+  float foffset = stack_load_float_default(stack, offset_offset, node2.w);
+  float gain = stack_load_float_default(stack, gain_offset, node3.x);
+  float scale = stack_load_float_default(stack, scale_offset, node3.y);
+
+  dimension = fmaxf(dimension, 1e-5f);
+  detail = clamp(detail, 0.0f, 16.0f);
+  lacunarity = fmaxf(lacunarity, 1e-5f);
+
+  float f = svm_musgrave(
+      (NodeMusgraveType)type, dimension, lacunarity, detail, foffset, 1.0f, gain, co * scale);
+
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, f);
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, make_float3(f, f, f));
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 8c425ecf326..322579ccfe3 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -33,280 +33,302 @@
 CCL_NAMESPACE_BEGIN
 
 #ifdef __KERNEL_SSE2__
-ccl_device_inline ssei quick_floor_sse(const ssef& x)
+ccl_device_inline ssei quick_floor_sse(const ssef &x)
 {
-	ssei b = truncatei(x);
-	ssei isneg = cast((x < ssef(0.0f)).m128);
-	return b + isneg; // unsaturated add 0xffffffff is the same as subtract -1
+  ssei b = truncatei(x);
+  ssei isneg = cast((x < ssef(0.0f)).m128);
+  return b + isneg;  // unsaturated add 0xffffffff is the same as subtract -1
 }
 #endif
 
 ccl_device uint hash(uint kx, uint ky, uint kz)
 {
-	// define some handy macros
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
-#define final(a,b,c) \
-{ \
-	c ^= b; c -= rot(b,14); \
-	a ^= c; a -= rot(c,11); \
-	b ^= a; b -= rot(a,25); \
-	c ^= b; c -= rot(b,16); \
-	a ^= c; a -= rot(c,4);  \
-	b ^= a; b -= rot(a,14); \
-	c ^= b; c -= rot(b,24); \
-}
-	// now hash the data!
-	uint a, b, c, len = 3;
-	a = b = c = 0xdeadbeef + (len << 2) + 13;
-
-	c += kz;
-	b += ky;
-	a += kx;
-	final(a, b, c);
-
-	return c;
-	// macros not needed anymore
+  // define some handy macros
+#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
+#define final(a, b, c) \
+  { \
+    c ^= b; \
+    c -= rot(b, 14); \
+    a ^= c; \
+    a -= rot(c, 11); \
+    b ^= a; \
+    b -= rot(a, 25); \
+    c ^= b; \
+    c -= rot(b, 16); \
+    a ^= c; \
+    a -= rot(c, 4); \
+    b ^= a; \
+    b -= rot(a, 14); \
+    c ^= b; \
+    c -= rot(b, 24); \
+  }
+  // now hash the data!
+  uint a, b, c, len = 3;
+  a = b = c = 0xdeadbeef + (len << 2) + 13;
+
+  c += kz;
+  b += ky;
+  a += kx;
+  final(a, b, c);
+
+  return c;
+  // macros not needed anymore
 #undef rot
 #undef final
 }
 
 #ifdef __KERNEL_SSE2__
-ccl_device_inline ssei hash_sse(const ssei& kx, const ssei& ky, const ssei& kz)
+ccl_device_inline ssei hash_sse(const ssei &kx, const ssei &ky, const ssei &kz)
 {
-#  define rot(x,k) (((x)<<(k)) | (srl(x, 32-(k))))
-#  define xor_rot(a, b, c) do {a = a^b; a = a - rot(b, c);} while(0)
-
-	uint len = 3;
-	ssei magic = ssei(0xdeadbeef + (len << 2) + 13);
-	ssei a = magic + kx;
-	ssei b = magic + ky;
-	ssei c = magic + kz;
-
-	xor_rot(c, b, 14);
-	xor_rot(a, c, 11);
-	xor_rot(b, a, 25);
-	xor_rot(c, b, 16);
-	xor_rot(a, c, 4);
-	xor_rot(b, a, 14);
-	xor_rot(c, b, 24);
-
-	return c;
+#  define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k))))
+#  define xor_rot(a, b, c) \
+    do { \
+      a = a ^ b; \
+      a = a - rot(b, c); \
+    } while (0)
+
+  uint len = 3;
+  ssei magic = ssei(0xdeadbeef + (len << 2) + 13);
+  ssei a = magic + kx;
+  ssei b = magic + ky;
+  ssei c = magic + kz;
+
+  xor_rot(c, b, 14);
+  xor_rot(a, c, 11);
+  xor_rot(b, a, 25);
+  xor_rot(c, b, 16);
+  xor_rot(a, c, 4);
+  xor_rot(b, a, 14);
+  xor_rot(c, b, 24);
+
+  return c;
 #  undef rot
 #  undef xor_rot
 }
 #endif
 
-#if 0 // unused
+#if 0  // unused
 ccl_device int imod(int a, int b)
 {
-	a %= b;
-	return a < 0 ? a + b : a;
+  a %= b;
+  return a < 0 ? a + b : a;
 }
 
 ccl_device uint phash(int kx, int ky, int kz, int3 p)
 {
-	return hash(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z));
+  return hash(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z));
 }
 #endif
 
 #ifndef __KERNEL_SSE2__
-ccl_device float floorfrac(float x, int* i)
+ccl_device float floorfrac(float x, int *i)
 {
-	*i = quick_floor_to_int(x);
-	return x - *i;
+  *i = quick_floor_to_int(x);
+  return x - *i;
 }
 #else
-ccl_device_inline ssef floorfrac_sse(const ssef& x, ssei *i)
+ccl_device_inline ssef floorfrac_sse(const ssef &x, ssei *i)
 {
-	*i = quick_floor_sse(x);
-	return x - ssef(*i);
+  *i = quick_floor_sse(x);
+  return x - ssef(*i);
 }
 #endif
 
 #ifndef __KERNEL_SSE2__
 ccl_device float fade(float t)
 {
-	return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
+  return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
 }
 #else
 ccl_device_inline ssef fade_sse(const ssef *t)
 {
-	ssef a = madd(*t, ssef(6.0f), ssef(-15.0f));
-	ssef b = madd(*t, a, ssef(10.0f));
-	return ((*t) * (*t)) * ((*t) * b);
+  ssef a = madd(*t, ssef(6.0f), ssef(-15.0f));
+  ssef b = madd(*t, a, ssef(10.0f));
+  return ((*t) * (*t)) * ((*t) * b);
 }
 #endif
 
 #ifndef __KERNEL_SSE2__
 ccl_device float nerp(float t, float a, float b)
 {
-	return (1.0f - t) * a + t * b;
+  return (1.0f - t) * a + t * b;
 }
 #else
-ccl_device_inline ssef nerp_sse(const ssef& t, const ssef& a, const ssef& b)
+ccl_device_inline ssef nerp_sse(const ssef &t, const ssef &a, const ssef &b)
 {
-	ssef x1 = (ssef(1.0f) - t) * a;
-	return madd(t, b, x1);
+  ssef x1 = (ssef(1.0f) - t) * a;
+  return madd(t, b, x1);
 }
 #endif
 
 #ifndef __KERNEL_SSE2__
 ccl_device float grad(int hash, float x, float y, float z)
 {
-	// use vectors pointing to the edges of the cube
-	int h = hash & 15;
-	float u = h<8 ? x : y;
-	float vt = ((h == 12) | (h == 14)) ? x : z;
-	float v = h < 4 ? y : vt;
-	return ((h&1) ? -u : u) + ((h&2) ? -v : v);
+  // use vectors pointing to the edges of the cube
+  int h = hash & 15;
+  float u = h < 8 ? x : y;
+  float vt = ((h == 12) | (h == 14)) ? x : z;
+  float v = h < 4 ? y : vt;
+  return ((h & 1) ? -u : u) + ((h & 2) ? -v : v);
 }
 #else
-ccl_device_inline ssef grad_sse(const ssei& hash, const ssef& x, const ssef& y, const ssef& z)
+ccl_device_inline ssef grad_sse(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
 {
-	ssei c1 = ssei(1);
-	ssei c2 = ssei(2);
+  ssei c1 = ssei(1);
+  ssei c2 = ssei(2);
 
-	ssei h = hash & ssei(15);                             // h = hash & 15
+  ssei h = hash & ssei(15);  // h = hash & 15
 
-	sseb case_ux = h < ssei(8);                           // 0xffffffff if h < 8 else 0
+  sseb case_ux = h < ssei(8);  // 0xffffffff if h < 8 else 0
 
-	ssef u = select(case_ux, x, y);                       // u = h<8 ? x : y
+  ssef u = select(case_ux, x, y);  // u = h<8 ? x : y
 
-	sseb case_vy = h < ssei(4);                           // 0xffffffff if h < 4 else 0
+  sseb case_vy = h < ssei(4);  // 0xffffffff if h < 4 else 0
 
-	sseb case_h12 = h == ssei(12);                        // 0xffffffff if h == 12 else 0
-	sseb case_h14 = h == ssei(14);                        // 0xffffffff if h == 14 else 0
+  sseb case_h12 = h == ssei(12);  // 0xffffffff if h == 12 else 0
+  sseb case_h14 = h == ssei(14);  // 0xffffffff if h == 14 else 0
 
-	sseb case_vx = case_h12 | case_h14;                   // 0xffffffff if h == 12 or h == 14 else 0
+  sseb case_vx = case_h12 | case_h14;  // 0xffffffff if h == 12 or h == 14 else 0
 
-	ssef v = select(case_vy, y, select(case_vx, x, z));   // v = h<4 ? y : h == 12 || h == 14 ? x : z
+  ssef v = select(case_vy, y, select(case_vx, x, z));  // v = h<4 ? y : h == 12 || h == 14 ? x : z
 
-	ssei case_uneg = (h & c1) << 31;                      // 1<<31 if h&1 else 0
-	ssef case_uneg_mask = cast(case_uneg);                // -0.0 if h&1 else +0.0
-	ssef ru = u ^ case_uneg_mask;                         // -u if h&1 else u (copy float sign)
+  ssei case_uneg = (h & c1) << 31;        // 1<<31 if h&1 else 0
+  ssef case_uneg_mask = cast(case_uneg);  // -0.0 if h&1 else +0.0
+  ssef ru = u ^ case_uneg_mask;           // -u if h&1 else u (copy float sign)
 
-	ssei case_vneg = (h & c2) << 30;                      // 2<<30 if h&2 else 0
-	ssef case_vneg_mask = cast(case_vneg);                // -0.0 if h&2 else +0.0
-	ssef rv = v ^ case_vneg_mask;                         // -v if h&2 else v (copy float sign)
+  ssei case_vneg = (h & c2) << 30;        // 2<<30 if h&2 else 0
+  ssef case_vneg_mask = cast(case_vneg);  // -0.0 if h&2 else +0.0
+  ssef rv = v ^ case_vneg_mask;           // -v if h&2 else v (copy float sign)
 
-	ssef r = ru + rv;                                     // ((h&1) ? -u : u) + ((h&2) ? -v : v)
-	return r;
+  ssef r = ru + rv;  // ((h&1) ? -u : u) + ((h&2) ? -v : v)
+  return r;
 }
 #endif
 
 #ifndef __KERNEL_SSE2__
 ccl_device float scale3(float result)
 {
-	return 0.9820f * result;
+  return 0.9820f * result;
 }
 #else
-ccl_device_inline ssef scale3_sse(const ssef& result)
+ccl_device_inline ssef scale3_sse(const ssef &result)
 {
-	return ssef(0.9820f) * result;
+  return ssef(0.9820f) * result;
 }
 #endif
 
 #ifndef __KERNEL_SSE2__
 ccl_device_noinline float perlin(float x, float y, float z)
 {
-	int X; float fx = floorfrac(x, &X);
-	int Y; float fy = floorfrac(y, &Y);
-	int Z; float fz = floorfrac(z, &Z);
-
-	float u = fade(fx);
-	float v = fade(fy);
-	float w = fade(fz);
-
-	float result;
-
-	result = nerp (w, nerp (v, nerp (u, grad (hash (X  , Y  , Z  ), fx	 , fy	 , fz	  ),
-										grad (hash (X+1, Y  , Z  ), fx-1.0f, fy	 , fz	  )),
-							   nerp (u, grad (hash (X  , Y+1, Z  ), fx	 , fy-1.0f, fz	  ),
-										grad (hash (X+1, Y+1, Z  ), fx-1.0f, fy-1.0f, fz	  ))),
-					  nerp (v, nerp (u, grad (hash (X  , Y  , Z+1), fx	 , fy	 , fz-1.0f ),
-										grad (hash (X+1, Y  , Z+1), fx-1.0f, fy	 , fz-1.0f )),
-							   nerp (u, grad (hash (X  , Y+1, Z+1), fx	 , fy-1.0f, fz-1.0f ),
-										grad (hash (X+1, Y+1, Z+1), fx-1.0f, fy-1.0f, fz-1.0f ))));
-	float r = scale3(result);
-
-	/* can happen for big coordinates, things even out to 0.0 then anyway */
-	return (isfinite(r))? r: 0.0f;
+  int X;
+  float fx = floorfrac(x, &X);
+  int Y;
+  float fy = floorfrac(y, &Y);
+  int Z;
+  float fz = floorfrac(z, &Z);
+
+  float u = fade(fx);
+  float v = fade(fy);
+  float w = fade(fz);
+
+  float result;
+
+  result = nerp(
+      w,
+      nerp(v,
+           nerp(u, grad(hash(X, Y, Z), fx, fy, fz), grad(hash(X + 1, Y, Z), fx - 1.0f, fy, fz)),
+           nerp(u,
+                grad(hash(X, Y + 1, Z), fx, fy - 1.0f, fz),
+                grad(hash(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz))),
+      nerp(v,
+           nerp(u,
+                grad(hash(X, Y, Z + 1), fx, fy, fz - 1.0f),
+                grad(hash(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f)),
+           nerp(u,
+                grad(hash(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
+                grad(hash(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f))));
+  float r = scale3(result);
+
+  /* can happen for big coordinates, things even out to 0.0 then anyway */
+  return (isfinite(r)) ? r : 0.0f;
 }
 #else
 ccl_device_noinline float perlin(float x, float y, float z)
 {
-	ssef xyz = ssef(x, y, z, 0.0f);
-	ssei XYZ;
+  ssef xyz = ssef(x, y, z, 0.0f);
+  ssei XYZ;
 
-	ssef fxyz = floorfrac_sse(xyz, &XYZ);
+  ssef fxyz = floorfrac_sse(xyz, &XYZ);
 
-	ssef uvw = fade_sse(&fxyz);
-	ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw);
+  ssef uvw = fade_sse(&fxyz);
+  ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw);
 
-	ssei XYZ_ofc = XYZ + ssei(1);
-	ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc);                      // +0, +0, +1, +1
-	ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc)); // +0, +1, +0, +1
+  ssei XYZ_ofc = XYZ + ssei(1);
+  ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc);                       // +0, +0, +1, +1
+  ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc));  // +0, +1, +0, +1
 
-	ssei h1 = hash_sse(shuffle<0>(XYZ),     vdy, vdz);               // hash directions 000, 001, 010, 011
-	ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz);               // hash directions 100, 101, 110, 111
+  ssei h1 = hash_sse(shuffle<0>(XYZ), vdy, vdz);      // hash directions 000, 001, 010, 011
+  ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz);  // hash directions 100, 101, 110, 111
 
-	ssef fxyz_ofc = fxyz - ssef(1.0f);
-	ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc);
-	ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc));
+  ssef fxyz_ofc = fxyz - ssef(1.0f);
+  ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc);
+  ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc));
 
-	ssef g1 = grad_sse(h1, shuffle<0>(fxyz),     vfy, vfz);
-	ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz);
-	ssef n1 = nerp_sse(u, g1, g2);
+  ssef g1 = grad_sse(h1, shuffle<0>(fxyz), vfy, vfz);
+  ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz);
+  ssef n1 = nerp_sse(u, g1, g2);
 
-	ssef n1_half = shuffle<2, 3, 2, 3>(n1);      // extract 2 floats to a separate vector
-	ssef n2 = nerp_sse(v, n1, n1_half);          // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _]
+  ssef n1_half = shuffle<2, 3, 2, 3>(n1);  // extract 2 floats to a separate vector
+  ssef n2 = nerp_sse(
+      v, n1, n1_half);  // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _]
 
-	ssef n2_second = shuffle<1>(n2);           // extract b to a separate vector
-	ssef result = nerp_sse(w, n2, n2_second);    // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _]
+  ssef n2_second = shuffle<1>(n2);  // extract b to a separate vector
+  ssef result = nerp_sse(
+      w, n2, n2_second);  // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _]
 
-	ssef r = scale3_sse(result);
+  ssef r = scale3_sse(result);
 
-	ssef infmask = cast(ssei(0x7f800000));
-	ssef rinfmask = ((r & infmask) == infmask).m128; // 0xffffffff if r is inf/-inf/nan else 0
-	ssef rfinite = andnot(rinfmask, r);              // 0 if r is inf/-inf/nan else r
-	return extract<0>(rfinite);
+  ssef infmask = cast(ssei(0x7f800000));
+  ssef rinfmask = ((r & infmask) == infmask).m128;  // 0xffffffff if r is inf/-inf/nan else 0
+  ssef rfinite = andnot(rinfmask, r);               // 0 if r is inf/-inf/nan else r
+  return extract<0>(rfinite);
 }
 #endif
 
 /* perlin noise in range 0..1 */
 ccl_device float noise(float3 p)
 {
-	float r = perlin(p.x, p.y, p.z);
-	return 0.5f*r + 0.5f;
+  float r = perlin(p.x, p.y, p.z);
+  return 0.5f * r + 0.5f;
 }
 
 /* perlin noise in range -1..1 */
 ccl_device float snoise(float3 p)
 {
-	return perlin(p.x, p.y, p.z);
+  return perlin(p.x, p.y, p.z);
 }
 
 /* cell noise */
 ccl_device float cellnoise(float3 p)
 {
-	int3 ip = quick_floor_to_int3(p);
-	return bits_to_01(hash(ip.x, ip.y, ip.z));
+  int3 ip = quick_floor_to_int3(p);
+  return bits_to_01(hash(ip.x, ip.y, ip.z));
 }
 
 ccl_device float3 cellnoise3(float3 p)
 {
-	int3 ip = quick_floor_to_int3(p);
+  int3 ip = quick_floor_to_int3(p);
 #ifndef __KERNEL_SSE__
-	float r = bits_to_01(hash(ip.x, ip.y, ip.z));
-	float g = bits_to_01(hash(ip.y, ip.x, ip.z));
-	float b = bits_to_01(hash(ip.y, ip.z, ip.x));
-	return make_float3(r, g, b);
+  float r = bits_to_01(hash(ip.x, ip.y, ip.z));
+  float g = bits_to_01(hash(ip.y, ip.x, ip.z));
+  float b = bits_to_01(hash(ip.y, ip.z, ip.x));
+  return make_float3(r, g, b);
 #else
-	ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128));
-	ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128));
-	ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128));
-	ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx);
-	return float3(uint32_to_float(bits) * ssef(1.0f/(float)0xFFFFFFFF));
+  ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128));
+  ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128));
+  ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128));
+  ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx);
+  return float3(uint32_to_float(bits) * ssef(1.0f / (float)0xFFFFFFFF));
 #endif
 }
 
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
index c02940f96d6..3324e86fcd8 100644
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ b/intern/cycles/kernel/svm/svm_noisetex.h
@@ -18,42 +18,43 @@ CCL_NAMESPACE_BEGIN
 
 /* Noise */
 
-ccl_device void svm_node_tex_noise(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_noise(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint co_offset, scale_offset, detail_offset, distortion_offset, fac_offset, color_offset;
+  uint co_offset, scale_offset, detail_offset, distortion_offset, fac_offset, color_offset;
 
-	decode_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
-	decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
+  decode_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
+  decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
 
-	uint4 node2 = read_node(kg, offset);
+  uint4 node2 = read_node(kg, offset);
 
-	float scale = stack_load_float_default(stack, scale_offset, node2.x);
-	float detail = stack_load_float_default(stack, detail_offset, node2.y);
-	float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
-	float3 p = stack_load_float3(stack, co_offset) * scale;
-	int hard = 0;
+  float scale = stack_load_float_default(stack, scale_offset, node2.x);
+  float detail = stack_load_float_default(stack, detail_offset, node2.y);
+  float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
+  float3 p = stack_load_float3(stack, co_offset) * scale;
+  int hard = 0;
 
-	if(distortion != 0.0f) {
-		float3 r, offset = make_float3(13.5f, 13.5f, 13.5f);
+  if (distortion != 0.0f) {
+    float3 r, offset = make_float3(13.5f, 13.5f, 13.5f);
 
-		r.x = noise(p + offset) * distortion;
-		r.y = noise(p) * distortion;
-		r.z = noise(p - offset) * distortion;
+    r.x = noise(p + offset) * distortion;
+    r.y = noise(p) * distortion;
+    r.z = noise(p - offset) * distortion;
 
-		p += r;
-	}
+    p += r;
+  }
 
-	float f = noise_turbulence(p, detail, hard);
+  float f = noise_turbulence(p, detail, hard);
 
-	if(stack_valid(fac_offset)) {
-		stack_store_float(stack, fac_offset, f);
-	}
-	if(stack_valid(color_offset)) {
-		float3 color = make_float3(f,
-			noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard),
-			noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard));
-		stack_store_float3(stack, color_offset, color);
-	}
+  if (stack_valid(fac_offset)) {
+    stack_store_float(stack, fac_offset, f);
+  }
+  if (stack_valid(color_offset)) {
+    float3 color = make_float3(f,
+                               noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard),
+                               noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard));
+    stack_store_float3(stack, color_offset, color);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h
index fe46d79fe15..4cd3eab0ed2 100644
--- a/intern/cycles/kernel/svm/svm_normal.h
+++ b/intern/cycles/kernel/svm/svm_normal.h
@@ -16,23 +16,29 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void svm_node_normal(KernelGlobals *kg, ShaderData *sd, float *stack, uint in_normal_offset, uint out_normal_offset, uint out_dot_offset, int *offset)
+ccl_device void svm_node_normal(KernelGlobals *kg,
+                                ShaderData *sd,
+                                float *stack,
+                                uint in_normal_offset,
+                                uint out_normal_offset,
+                                uint out_dot_offset,
+                                int *offset)
 {
-	/* read extra data */
-	uint4 node1 = read_node(kg, offset);
-	float3 normal = stack_load_float3(stack, in_normal_offset);
+  /* read extra data */
+  uint4 node1 = read_node(kg, offset);
+  float3 normal = stack_load_float3(stack, in_normal_offset);
 
-	float3 direction;
-	direction.x = __int_as_float(node1.x);
-	direction.y = __int_as_float(node1.y);
-	direction.z = __int_as_float(node1.z);
-	direction = normalize(direction);
+  float3 direction;
+  direction.x = __int_as_float(node1.x);
+  direction.y = __int_as_float(node1.y);
+  direction.z = __int_as_float(node1.z);
+  direction = normalize(direction);
 
-	if(stack_valid(out_normal_offset))
-		stack_store_float3(stack, out_normal_offset, direction);
+  if (stack_valid(out_normal_offset))
+    stack_store_float3(stack, out_normal_offset, direction);
 
-	if(stack_valid(out_dot_offset))
-		stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal)));
+  if (stack_valid(out_dot_offset))
+    stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal)));
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index 6f39391057e..6084ee35a1f 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -21,91 +21,84 @@ CCL_NAMESPACE_BEGIN
 
 /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
 
-ccl_device_inline float4 rgb_ramp_lookup(KernelGlobals *kg,
-                                         int offset,
-                                         float f,
-                                         bool interpolate,
-                                         bool extrapolate,
-                                         int table_size)
+ccl_device_inline float4 rgb_ramp_lookup(
+    KernelGlobals *kg, int offset, float f, bool interpolate, bool extrapolate, int table_size)
 {
-	if((f < 0.0f || f > 1.0f) && extrapolate) {
-		float4 t0, dy;
-		if(f < 0.0f) {
-			t0 = fetch_node_float(kg, offset);
-			dy = t0 - fetch_node_float(kg, offset + 1);
-			f = -f;
-		}
-		else {
-			t0 = fetch_node_float(kg, offset + table_size - 1);
-			dy = t0 - fetch_node_float(kg, offset + table_size - 2);
-			f = f - 1.0f;
-		}
-		return t0 + dy * f * (table_size-1);
-	}
-
-	f = saturate(f)*(table_size-1);
-
-	/* clamp int as well in case of NaN */
-	int i = clamp(float_to_int(f), 0, table_size-1);
-	float t = f - (float)i;
-
-	float4 a = fetch_node_float(kg, offset+i);
-
-	if(interpolate && t > 0.0f)
-		a = (1.0f - t)*a + t*fetch_node_float(kg, offset+i+1);
-
-	return a;
+  if ((f < 0.0f || f > 1.0f) && extrapolate) {
+    float4 t0, dy;
+    if (f < 0.0f) {
+      t0 = fetch_node_float(kg, offset);
+      dy = t0 - fetch_node_float(kg, offset + 1);
+      f = -f;
+    }
+    else {
+      t0 = fetch_node_float(kg, offset + table_size - 1);
+      dy = t0 - fetch_node_float(kg, offset + table_size - 2);
+      f = f - 1.0f;
+    }
+    return t0 + dy * f * (table_size - 1);
+  }
+
+  f = saturate(f) * (table_size - 1);
+
+  /* clamp int as well in case of NaN */
+  int i = clamp(float_to_int(f), 0, table_size - 1);
+  float t = f - (float)i;
+
+  float4 a = fetch_node_float(kg, offset + i);
+
+  if (interpolate && t > 0.0f)
+    a = (1.0f - t) * a + t * fetch_node_float(kg, offset + i + 1);
+
+  return a;
 }
 
-ccl_device void svm_node_rgb_ramp(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_rgb_ramp(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint fac_offset, color_offset, alpha_offset;
-	uint interpolate = node.z;
+  uint fac_offset, color_offset, alpha_offset;
+  uint interpolate = node.z;
 
-	decode_node_uchar4(node.y, &fac_offset, &color_offset, &alpha_offset, NULL);
+  decode_node_uchar4(node.y, &fac_offset, &color_offset, &alpha_offset, NULL);
 
-	uint table_size = read_node(kg, offset).x;
+  uint table_size = read_node(kg, offset).x;
 
-	float fac = stack_load_float(stack, fac_offset);
-	float4 color = rgb_ramp_lookup(kg, *offset, fac, interpolate, false, table_size);
+  float fac = stack_load_float(stack, fac_offset);
+  float4 color = rgb_ramp_lookup(kg, *offset, fac, interpolate, false, table_size);
 
-	if(stack_valid(color_offset))
-		stack_store_float3(stack, color_offset, float4_to_float3(color));
-	if(stack_valid(alpha_offset))
-		stack_store_float(stack, alpha_offset, color.w);
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, float4_to_float3(color));
+  if (stack_valid(alpha_offset))
+    stack_store_float(stack, alpha_offset, color.w);
 
-	*offset += table_size;
+  *offset += table_size;
 }
 
-ccl_device void svm_node_curves(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_curves(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint fac_offset, color_offset, out_offset;
-	decode_node_uchar4(node.y,
-	                   &fac_offset,
-	                   &color_offset,
-	                   &out_offset,
-	                   NULL);
+  uint fac_offset, color_offset, out_offset;
+  decode_node_uchar4(node.y, &fac_offset, &color_offset, &out_offset, NULL);
 
-	uint table_size = read_node(kg, offset).x;
+  uint table_size = read_node(kg, offset).x;
 
-	float fac = stack_load_float(stack, fac_offset);
-	float3 color = stack_load_float3(stack, color_offset);
+  float fac = stack_load_float(stack, fac_offset);
+  float3 color = stack_load_float3(stack, color_offset);
 
-	const float min_x = __int_as_float(node.z),
-	            max_x = __int_as_float(node.w);
-	const float range_x = max_x - min_x;
-	const float3 relpos = (color - make_float3(min_x, min_x, min_x)) / range_x;
+  const float min_x = __int_as_float(node.z), max_x = __int_as_float(node.w);
+  const float range_x = max_x - min_x;
+  const float3 relpos = (color - make_float3(min_x, min_x, min_x)) / range_x;
 
-	float r = rgb_ramp_lookup(kg, *offset, relpos.x, true, true, table_size).x;
-	float g = rgb_ramp_lookup(kg, *offset, relpos.y, true, true, table_size).y;
-	float b = rgb_ramp_lookup(kg, *offset, relpos.z, true, true, table_size).z;
+  float r = rgb_ramp_lookup(kg, *offset, relpos.x, true, true, table_size).x;
+  float g = rgb_ramp_lookup(kg, *offset, relpos.y, true, true, table_size).y;
+  float b = rgb_ramp_lookup(kg, *offset, relpos.z, true, true, table_size).z;
 
-	color = (1.0f - fac)*color + fac*make_float3(r, g, b);
-	stack_store_float3(stack, out_offset, color);
+  color = (1.0f - fac) * color + fac * make_float3(r, g, b);
+  stack_store_float3(stack, out_offset, color);
 
-	*offset += table_size;
+  *offset += table_size;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __SVM_RAMP_H__ */
+#endif /* __SVM_RAMP_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ramp_util.h b/intern/cycles/kernel/svm/svm_ramp_util.h
index 847108ff1c2..202596c1fe3 100644
--- a/intern/cycles/kernel/svm/svm_ramp_util.h
+++ b/intern/cycles/kernel/svm/svm_ramp_util.h
@@ -21,78 +21,70 @@ CCL_NAMESPACE_BEGIN
 
 /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
 
-ccl_device_inline float3 rgb_ramp_lookup(const float3 *ramp,
-                                         float f,
-                                         bool interpolate,
-                                         bool extrapolate,
-                                         int table_size)
+ccl_device_inline float3
+rgb_ramp_lookup(const float3 *ramp, float f, bool interpolate, bool extrapolate, int table_size)
 {
-	if((f < 0.0f || f > 1.0f) && extrapolate) {
-		float3 t0, dy;
-		if(f < 0.0f) {
-			t0 = ramp[0];
-			dy = t0 - ramp[1],
-			f = -f;
-		}
-		else {
-			t0 = ramp[table_size - 1];
-			dy = t0 - ramp[table_size - 2];
-			f = f - 1.0f;
-		}
-		return t0 + dy * f * (table_size - 1);
-	}
-
-	f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
-
-	/* clamp int as well in case of NaN */
-	int i = clamp(float_to_int(f), 0, table_size-1);
-	float t = f - (float)i;
-
-	float3 result = ramp[i];
-
-	if(interpolate && t > 0.0f) {
-		result = (1.0f - t) * result + t * ramp[i + 1];
-	}
-
-	return result;
+  if ((f < 0.0f || f > 1.0f) && extrapolate) {
+    float3 t0, dy;
+    if (f < 0.0f) {
+      t0 = ramp[0];
+      dy = t0 - ramp[1], f = -f;
+    }
+    else {
+      t0 = ramp[table_size - 1];
+      dy = t0 - ramp[table_size - 2];
+      f = f - 1.0f;
+    }
+    return t0 + dy * f * (table_size - 1);
+  }
+
+  f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
+
+  /* clamp int as well in case of NaN */
+  int i = clamp(float_to_int(f), 0, table_size - 1);
+  float t = f - (float)i;
+
+  float3 result = ramp[i];
+
+  if (interpolate && t > 0.0f) {
+    result = (1.0f - t) * result + t * ramp[i + 1];
+  }
+
+  return result;
 }
 
-ccl_device float float_ramp_lookup(const float *ramp,
-                                   float f,
-                                   bool interpolate,
-                                   bool extrapolate,
-                                   int table_size)
+ccl_device float float_ramp_lookup(
+    const float *ramp, float f, bool interpolate, bool extrapolate, int table_size)
 {
-	if((f < 0.0f || f > 1.0f) && extrapolate) {
-		float t0, dy;
-		if(f < 0.0f) {
-			t0 = ramp[0];
-			dy = t0 - ramp[1],
-			f = -f;
-		}
-		else {
-			t0 = ramp[table_size - 1];
-			dy = t0 - ramp[table_size - 2];
-			f = f - 1.0f;
-		}
-		return t0 + dy * f * (table_size - 1);
-	}
-
-	f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
-
-	/* clamp int as well in case of NaN */
-	int i = clamp(float_to_int(f), 0, table_size-1);
-	float t = f - (float)i;
-
-	float result = ramp[i];
-
-	if(interpolate && t > 0.0f) {
-		result = (1.0f - t) * result + t * ramp[i + 1];
-	}
-
-	return result;
+  if ((f < 0.0f || f > 1.0f) && extrapolate) {
+    float t0, dy;
+    if (f < 0.0f) {
+      t0 = ramp[0];
+      dy = t0 - ramp[1], f = -f;
+    }
+    else {
+      t0 = ramp[table_size - 1];
+      dy = t0 - ramp[table_size - 2];
+      f = f - 1.0f;
+    }
+    return t0 + dy * f * (table_size - 1);
+  }
+
+  f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
+
+  /* clamp int as well in case of NaN */
+  int i = clamp(float_to_int(f), 0, table_size - 1);
+  float t = f - (float)i;
+
+  float result = ramp[i];
+
+  if (interpolate && t > 0.0f) {
+    result = (1.0f - t) * result + t * ramp[i + 1];
+  }
+
+  return result;
 }
 
 CCL_NAMESPACE_END
 
-#endif  /* __SVM_RAMP_UTIL_H__ */
+#endif /* __SVM_RAMP_UTIL_H__ */
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
index 1096aed2d97..f501252062e 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
@@ -16,38 +16,50 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void svm_node_combine_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint hue_in, uint saturation_in, uint value_in, int *offset)
+ccl_device void svm_node_combine_hsv(KernelGlobals *kg,
+                                     ShaderData *sd,
+                                     float *stack,
+                                     uint hue_in,
+                                     uint saturation_in,
+                                     uint value_in,
+                                     int *offset)
 {
-	uint4 node1 = read_node(kg, offset);
-	uint color_out = node1.y;
+  uint4 node1 = read_node(kg, offset);
+  uint color_out = node1.y;
 
-	float hue = stack_load_float(stack, hue_in);
-	float saturation = stack_load_float(stack, saturation_in);
-	float value = stack_load_float(stack, value_in);
+  float hue = stack_load_float(stack, hue_in);
+  float saturation = stack_load_float(stack, saturation_in);
+  float value = stack_load_float(stack, value_in);
 
-	/* Combine, and convert back to RGB */
-	float3 color = hsv_to_rgb(make_float3(hue, saturation, value));
+  /* Combine, and convert back to RGB */
+  float3 color = hsv_to_rgb(make_float3(hue, saturation, value));
 
-	if(stack_valid(color_out))
-		stack_store_float3(stack, color_out, color);
+  if (stack_valid(color_out))
+    stack_store_float3(stack, color_out, color);
 }
 
-ccl_device void svm_node_separate_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint color_in, uint hue_out, uint saturation_out, int *offset)
+ccl_device void svm_node_separate_hsv(KernelGlobals *kg,
+                                      ShaderData *sd,
+                                      float *stack,
+                                      uint color_in,
+                                      uint hue_out,
+                                      uint saturation_out,
+                                      int *offset)
 {
-	uint4 node1 = read_node(kg, offset);
-	uint value_out = node1.y;
+  uint4 node1 = read_node(kg, offset);
+  uint value_out = node1.y;
 
-	float3 color = stack_load_float3(stack, color_in);
+  float3 color = stack_load_float3(stack, color_in);
 
-	/* Convert to HSV */
-	color = rgb_to_hsv(color);
+  /* Convert to HSV */
+  color = rgb_to_hsv(color);
 
-	if(stack_valid(hue_out))
-		stack_store_float(stack, hue_out, color.x);
-	if(stack_valid(saturation_out))
-		stack_store_float(stack, saturation_out, color.y);
-	if(stack_valid(value_out))
-		stack_store_float(stack, value_out, color.z);
+  if (stack_valid(hue_out))
+    stack_store_float(stack, hue_out, color.x);
+  if (stack_valid(saturation_out))
+    stack_store_float(stack, saturation_out, color.y);
+  if (stack_valid(value_out))
+    stack_store_float(stack, value_out, color.z);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_vector.h b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
index 0d85c0d6f1d..cbf77f1e640 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_vector.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
@@ -18,26 +18,28 @@ CCL_NAMESPACE_BEGIN
 
 /* Vector combine / separate, used for the RGB and XYZ nodes */
 
-ccl_device void svm_node_combine_vector(ShaderData *sd, float *stack, uint in_offset, uint vector_index, uint out_offset)
+ccl_device void svm_node_combine_vector(
+    ShaderData *sd, float *stack, uint in_offset, uint vector_index, uint out_offset)
 {
-	float vector = stack_load_float(stack, in_offset);
+  float vector = stack_load_float(stack, in_offset);
 
-	if(stack_valid(out_offset))
-		stack_store_float(stack, out_offset+vector_index, vector);
+  if (stack_valid(out_offset))
+    stack_store_float(stack, out_offset + vector_index, vector);
 }
 
-ccl_device void svm_node_separate_vector(ShaderData *sd, float *stack, uint ivector_offset, uint vector_index, uint out_offset)
+ccl_device void svm_node_separate_vector(
+    ShaderData *sd, float *stack, uint ivector_offset, uint vector_index, uint out_offset)
 {
-	float3 vector = stack_load_float3(stack, ivector_offset);
+  float3 vector = stack_load_float3(stack, ivector_offset);
 
-	if(stack_valid(out_offset)) {
-		if(vector_index == 0)
-			stack_store_float(stack, out_offset, vector.x);
-		else if(vector_index == 1)
-			stack_store_float(stack, out_offset, vector.y);
-		else
-			stack_store_float(stack, out_offset, vector.z);
-	}
+  if (stack_valid(out_offset)) {
+    if (vector_index == 0)
+      stack_store_float(stack, out_offset, vector.x);
+    else if (vector_index == 1)
+      stack_store_float(stack, out_offset, vector.y);
+    else
+      stack_store_float(stack, out_offset, vector.z);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h
index 092f6e045d6..50fe0c8232f 100644
--- a/intern/cycles/kernel/svm/svm_sky.h
+++ b/intern/cycles/kernel/svm/svm_sky.h
@@ -20,8 +20,8 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device float sky_angle_between(float thetav, float phiv, float theta, float phi)
 {
-	float cospsi = sinf(thetav)*sinf(theta)*cosf(phi - phiv) + cosf(thetav)*cosf(theta);
-	return safe_acosf(cospsi);
+  float cospsi = sinf(thetav) * sinf(theta) * cosf(phi - phiv) + cosf(thetav) * cosf(theta);
+  return safe_acosf(cospsi);
 }
 
 /*
@@ -30,36 +30,43 @@ ccl_device float sky_angle_between(float thetav, float phiv, float theta, float
  */
 ccl_device float sky_perez_function(float *lam, float theta, float gamma)
 {
-	float ctheta = cosf(theta);
-	float cgamma = cosf(gamma);
+  float ctheta = cosf(theta);
+  float cgamma = cosf(gamma);
 
-	return (1.0f + lam[0]*expf(lam[1]/ctheta)) * (1.0f + lam[2]*expf(lam[3]*gamma)  + lam[4]*cgamma*cgamma);
+  return (1.0f + lam[0] * expf(lam[1] / ctheta)) *
+         (1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma);
 }
 
-ccl_device float3 sky_radiance_old(KernelGlobals *kg, float3 dir,
-                                 float sunphi, float suntheta,
-                                 float radiance_x, float radiance_y, float radiance_z,
-                                 float *config_x, float *config_y, float *config_z)
+ccl_device float3 sky_radiance_old(KernelGlobals *kg,
+                                   float3 dir,
+                                   float sunphi,
+                                   float suntheta,
+                                   float radiance_x,
+                                   float radiance_y,
+                                   float radiance_z,
+                                   float *config_x,
+                                   float *config_y,
+                                   float *config_z)
 {
-	/* convert vector to spherical coordinates */
-	float2 spherical = direction_to_spherical(dir);
-	float theta = spherical.x;
-	float phi = spherical.y;
+  /* convert vector to spherical coordinates */
+  float2 spherical = direction_to_spherical(dir);
+  float theta = spherical.x;
+  float phi = spherical.y;
 
-	/* angle between sun direction and dir */
-	float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
+  /* angle between sun direction and dir */
+  float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
 
-	/* clamp theta to horizon */
-	theta = min(theta, M_PI_2_F - 0.001f);
+  /* clamp theta to horizon */
+  theta = min(theta, M_PI_2_F - 0.001f);
 
-	/* compute xyY color space values */
-	float x = radiance_y * sky_perez_function(config_y, theta, gamma);
-	float y = radiance_z * sky_perez_function(config_z, theta, gamma);
-	float Y = radiance_x * sky_perez_function(config_x, theta, gamma);
+  /* compute xyY color space values */
+  float x = radiance_y * sky_perez_function(config_y, theta, gamma);
+  float y = radiance_z * sky_perez_function(config_z, theta, gamma);
+  float Y = radiance_x * sky_perez_function(config_x, theta, gamma);
 
-	/* convert to RGB */
-	float3 xyz = xyY_to_xyz(x, y, Y);
-	return xyz_to_rgb(kg, xyz);
+  /* convert to RGB */
+  float3 xyz = xyY_to_xyz(x, y, Y);
+  return xyz_to_rgb(kg, xyz);
 }
 
 /*
@@ -68,118 +75,142 @@ ccl_device float3 sky_radiance_old(KernelGlobals *kg, float3 dir,
  */
 ccl_device float sky_radiance_internal(float *configuration, float theta, float gamma)
 {
-	float ctheta = cosf(theta);
-	float cgamma = cosf(gamma);
-
-	float expM = expf(configuration[4] * gamma);
-	float rayM = cgamma * cgamma;
-	float mieM = (1.0f + rayM) / powf((1.0f + configuration[8]*configuration[8] - 2.0f*configuration[8]*cgamma), 1.5f);
-	float zenith = sqrtf(ctheta);
-
-	return (1.0f + configuration[0] * expf(configuration[1] / (ctheta + 0.01f))) *
-		(configuration[2] + configuration[3] * expM + configuration[5] * rayM + configuration[6] * mieM + configuration[7] * zenith);
+  float ctheta = cosf(theta);
+  float cgamma = cosf(gamma);
+
+  float expM = expf(configuration[4] * gamma);
+  float rayM = cgamma * cgamma;
+  float mieM = (1.0f + rayM) / powf((1.0f + configuration[8] * configuration[8] -
+                                     2.0f * configuration[8] * cgamma),
+                                    1.5f);
+  float zenith = sqrtf(ctheta);
+
+  return (1.0f + configuration[0] * expf(configuration[1] / (ctheta + 0.01f))) *
+         (configuration[2] + configuration[3] * expM + configuration[5] * rayM +
+          configuration[6] * mieM + configuration[7] * zenith);
 }
 
-ccl_device float3 sky_radiance_new(KernelGlobals *kg, float3 dir,
-                                 float sunphi, float suntheta,
-                                 float radiance_x, float radiance_y, float radiance_z,
-                                 float *config_x, float *config_y, float *config_z)
+ccl_device float3 sky_radiance_new(KernelGlobals *kg,
+                                   float3 dir,
+                                   float sunphi,
+                                   float suntheta,
+                                   float radiance_x,
+                                   float radiance_y,
+                                   float radiance_z,
+                                   float *config_x,
+                                   float *config_y,
+                                   float *config_z)
 {
-	/* convert vector to spherical coordinates */
-	float2 spherical = direction_to_spherical(dir);
-	float theta = spherical.x;
-	float phi = spherical.y;
+  /* convert vector to spherical coordinates */
+  float2 spherical = direction_to_spherical(dir);
+  float theta = spherical.x;
+  float phi = spherical.y;
 
-	/* angle between sun direction and dir */
-	float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
+  /* angle between sun direction and dir */
+  float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
 
-	/* clamp theta to horizon */
-	theta = min(theta, M_PI_2_F - 0.001f);
+  /* clamp theta to horizon */
+  theta = min(theta, M_PI_2_F - 0.001f);
 
-	/* compute xyz color space values */
-	float x = sky_radiance_internal(config_x, theta, gamma) * radiance_x;
-	float y = sky_radiance_internal(config_y, theta, gamma) * radiance_y;
-	float z = sky_radiance_internal(config_z, theta, gamma) * radiance_z;
+  /* compute xyz color space values */
+  float x = sky_radiance_internal(config_x, theta, gamma) * radiance_x;
+  float y = sky_radiance_internal(config_y, theta, gamma) * radiance_y;
+  float z = sky_radiance_internal(config_z, theta, gamma) * radiance_z;
 
-	/* convert to RGB and adjust strength */
-	return xyz_to_rgb(kg, make_float3(x, y, z)) * (M_2PI_F/683);
+  /* convert to RGB and adjust strength */
+  return xyz_to_rgb(kg, make_float3(x, y, z)) * (M_2PI_F / 683);
 }
 
-ccl_device void svm_node_tex_sky(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_sky(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	/* Define variables */
-	float sunphi, suntheta, radiance_x, radiance_y, radiance_z;
-	float config_x[9], config_y[9], config_z[9];
-
-	/* Load data */
-	uint dir_offset = node.y;
-	uint out_offset = node.z;
-	int sky_model = node.w;
-
-	float4 data = read_node_float(kg, offset);
-	sunphi = data.x;
-	suntheta = data.y;
-	radiance_x = data.z;
-	radiance_y = data.w;
-
-	data = read_node_float(kg, offset);
-	radiance_z = data.x;
-	config_x[0] = data.y;
-	config_x[1] = data.z;
-	config_x[2] = data.w;
-
-	data = read_node_float(kg, offset);
-	config_x[3] = data.x;
-	config_x[4] = data.y;
-	config_x[5] = data.z;
-	config_x[6] = data.w;
-
-	data = read_node_float(kg, offset);
-	config_x[7] = data.x;
-	config_x[8] = data.y;
-	config_y[0] = data.z;
-	config_y[1] = data.w;
-
-	data = read_node_float(kg, offset);
-	config_y[2] = data.x;
-	config_y[3] = data.y;
-	config_y[4] = data.z;
-	config_y[5] = data.w;
-
-	data = read_node_float(kg, offset);
-	config_y[6] = data.x;
-	config_y[7] = data.y;
-	config_y[8] = data.z;
-	config_z[0] = data.w;
-
-	data = read_node_float(kg, offset);
-	config_z[1] = data.x;
-	config_z[2] = data.y;
-	config_z[3] = data.z;
-	config_z[4] = data.w;
-
-	data = read_node_float(kg, offset);
-	config_z[5] = data.x;
-	config_z[6] = data.y;
-	config_z[7] = data.z;
-	config_z[8] = data.w;
-
-	float3 dir = stack_load_float3(stack, dir_offset);
-	float3 f;
-
-	/* Compute Sky */
-	if(sky_model == 0) {
-		f = sky_radiance_old(kg, dir, sunphi, suntheta,
-	                             radiance_x, radiance_y, radiance_z,
-	                             config_x, config_y, config_z);
-	}
-	else {
-		f = sky_radiance_new(kg, dir, sunphi, suntheta,
-	                             radiance_x, radiance_y, radiance_z,
-	                             config_x, config_y, config_z);
-	}
-
-	stack_store_float3(stack, out_offset, f);
+  /* Define variables */
+  float sunphi, suntheta, radiance_x, radiance_y, radiance_z;
+  float config_x[9], config_y[9], config_z[9];
+
+  /* Load data */
+  uint dir_offset = node.y;
+  uint out_offset = node.z;
+  int sky_model = node.w;
+
+  float4 data = read_node_float(kg, offset);
+  sunphi = data.x;
+  suntheta = data.y;
+  radiance_x = data.z;
+  radiance_y = data.w;
+
+  data = read_node_float(kg, offset);
+  radiance_z = data.x;
+  config_x[0] = data.y;
+  config_x[1] = data.z;
+  config_x[2] = data.w;
+
+  data = read_node_float(kg, offset);
+  config_x[3] = data.x;
+  config_x[4] = data.y;
+  config_x[5] = data.z;
+  config_x[6] = data.w;
+
+  data = read_node_float(kg, offset);
+  config_x[7] = data.x;
+  config_x[8] = data.y;
+  config_y[0] = data.z;
+  config_y[1] = data.w;
+
+  data = read_node_float(kg, offset);
+  config_y[2] = data.x;
+  config_y[3] = data.y;
+  config_y[4] = data.z;
+  config_y[5] = data.w;
+
+  data = read_node_float(kg, offset);
+  config_y[6] = data.x;
+  config_y[7] = data.y;
+  config_y[8] = data.z;
+  config_z[0] = data.w;
+
+  data = read_node_float(kg, offset);
+  config_z[1] = data.x;
+  config_z[2] = data.y;
+  config_z[3] = data.z;
+  config_z[4] = data.w;
+
+  data = read_node_float(kg, offset);
+  config_z[5] = data.x;
+  config_z[6] = data.y;
+  config_z[7] = data.z;
+  config_z[8] = data.w;
+
+  float3 dir = stack_load_float3(stack, dir_offset);
+  float3 f;
+
+  /* Compute Sky */
+  if (sky_model == 0) {
+    f = sky_radiance_old(kg,
+                         dir,
+                         sunphi,
+                         suntheta,
+                         radiance_x,
+                         radiance_y,
+                         radiance_z,
+                         config_x,
+                         config_y,
+                         config_z);
+  }
+  else {
+    f = sky_radiance_new(kg,
+                         dir,
+                         sunphi,
+                         suntheta,
+                         radiance_x,
+                         radiance_y,
+                         radiance_z,
+                         config_x,
+                         config_y,
+                         config_z);
+  }
+
+  stack_store_float3(stack, out_offset, f);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index fe61292d0b0..1fb3e20f9e0 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -18,390 +18,381 @@ CCL_NAMESPACE_BEGIN
 
 /* Texture Coordinate Node */
 
-ccl_device void svm_node_tex_coord(KernelGlobals *kg,
-                                   ShaderData *sd,
-                                   int path_flag,
-                                   float *stack,
-                                   uint4 node,
-                                   int *offset)
+ccl_device void svm_node_tex_coord(
+    KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset)
 {
-	float3 data;
-	uint type = node.y;
-	uint out_offset = node.z;
-
-	switch(type) {
-		case NODE_TEXCO_OBJECT: {
-			data = sd->P;
-			if(node.w == 0) {
-				if(sd->object != OBJECT_NONE) {
-					object_inverse_position_transform(kg, sd, &data);
-				}
-			}
-			else {
-				Transform tfm;
-				tfm.x = read_node_float(kg, offset);
-				tfm.y = read_node_float(kg, offset);
-				tfm.z = read_node_float(kg, offset);
-				data = transform_point(&tfm, data);
-			}
-			break;
-		}
-		case NODE_TEXCO_NORMAL: {
-			data = sd->N;
-			object_inverse_normal_transform(kg, sd, &data);
-			break;
-		}
-		case NODE_TEXCO_CAMERA: {
-			Transform tfm = kernel_data.cam.worldtocamera;
-
-			if(sd->object != OBJECT_NONE)
-				data = transform_point(&tfm, sd->P);
-			else
-				data = transform_point(&tfm, sd->P + camera_position(kg));
-			break;
-		}
-		case NODE_TEXCO_WINDOW: {
-			if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
-				data = camera_world_to_ndc(kg, sd, sd->ray_P);
-			else
-				data = camera_world_to_ndc(kg, sd, sd->P);
-			data.z = 0.0f;
-			break;
-		}
-		case NODE_TEXCO_REFLECTION: {
-			if(sd->object != OBJECT_NONE)
-				data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
-			else
-				data = sd->I;
-			break;
-		}
-		case NODE_TEXCO_DUPLI_GENERATED: {
-			data = object_dupli_generated(kg, sd->object);
-			break;
-		}
-		case NODE_TEXCO_DUPLI_UV: {
-			data = object_dupli_uv(kg, sd->object);
-			break;
-		}
-		case NODE_TEXCO_VOLUME_GENERATED: {
-			data = sd->P;
+  float3 data;
+  uint type = node.y;
+  uint out_offset = node.z;
+
+  switch (type) {
+    case NODE_TEXCO_OBJECT: {
+      data = sd->P;
+      if (node.w == 0) {
+        if (sd->object != OBJECT_NONE) {
+          object_inverse_position_transform(kg, sd, &data);
+        }
+      }
+      else {
+        Transform tfm;
+        tfm.x = read_node_float(kg, offset);
+        tfm.y = read_node_float(kg, offset);
+        tfm.z = read_node_float(kg, offset);
+        data = transform_point(&tfm, data);
+      }
+      break;
+    }
+    case NODE_TEXCO_NORMAL: {
+      data = sd->N;
+      object_inverse_normal_transform(kg, sd, &data);
+      break;
+    }
+    case NODE_TEXCO_CAMERA: {
+      Transform tfm = kernel_data.cam.worldtocamera;
+
+      if (sd->object != OBJECT_NONE)
+        data = transform_point(&tfm, sd->P);
+      else
+        data = transform_point(&tfm, sd->P + camera_position(kg));
+      break;
+    }
+    case NODE_TEXCO_WINDOW: {
+      if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+          kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+        data = camera_world_to_ndc(kg, sd, sd->ray_P);
+      else
+        data = camera_world_to_ndc(kg, sd, sd->P);
+      data.z = 0.0f;
+      break;
+    }
+    case NODE_TEXCO_REFLECTION: {
+      if (sd->object != OBJECT_NONE)
+        data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
+      else
+        data = sd->I;
+      break;
+    }
+    case NODE_TEXCO_DUPLI_GENERATED: {
+      data = object_dupli_generated(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_DUPLI_UV: {
+      data = object_dupli_uv(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_VOLUME_GENERATED: {
+      data = sd->P;
 
 #ifdef __VOLUME__
-			if(sd->object != OBJECT_NONE)
-				data = volume_normalized_position(kg, sd, data);
+      if (sd->object != OBJECT_NONE)
+        data = volume_normalized_position(kg, sd, data);
 #endif
-			break;
-		}
-	}
+      break;
+    }
+  }
 
-	stack_store_float3(stack, out_offset, data);
+  stack_store_float3(stack, out_offset, data);
 }
 
-ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
-                                           ShaderData *sd,
-                                           int path_flag,
-                                           float *stack,
-                                           uint4 node,
-                                           int *offset)
+ccl_device void svm_node_tex_coord_bump_dx(
+    KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
-	float3 data;
-	uint type = node.y;
-	uint out_offset = node.z;
-
-	switch(type) {
-		case NODE_TEXCO_OBJECT: {
-			data = sd->P + sd->dP.dx;
-			if(node.w == 0) {
-				if(sd->object != OBJECT_NONE) {
-					object_inverse_position_transform(kg, sd, &data);
-				}
-			}
-			else {
-				Transform tfm;
-				tfm.x = read_node_float(kg, offset);
-				tfm.y = read_node_float(kg, offset);
-				tfm.z = read_node_float(kg, offset);
-				data = transform_point(&tfm, data);
-			}
-			break;
-		}
-		case NODE_TEXCO_NORMAL: {
-			data = sd->N;
-			object_inverse_normal_transform(kg, sd, &data);
-			break;
-		}
-		case NODE_TEXCO_CAMERA: {
-			Transform tfm = kernel_data.cam.worldtocamera;
-
-			if(sd->object != OBJECT_NONE)
-				data = transform_point(&tfm, sd->P + sd->dP.dx);
-			else
-				data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg));
-			break;
-		}
-		case NODE_TEXCO_WINDOW: {
-			if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
-				data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx);
-			else
-				data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
-			data.z = 0.0f;
-			break;
-		}
-		case NODE_TEXCO_REFLECTION: {
-			if(sd->object != OBJECT_NONE)
-				data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
-			else
-				data = sd->I;
-			break;
-		}
-		case NODE_TEXCO_DUPLI_GENERATED: {
-			data = object_dupli_generated(kg, sd->object);
-			break;
-		}
-		case NODE_TEXCO_DUPLI_UV: {
-			data = object_dupli_uv(kg, sd->object);
-			break;
-		}
-		case NODE_TEXCO_VOLUME_GENERATED: {
-			data = sd->P + sd->dP.dx;
-
-#ifdef __VOLUME__
-			if(sd->object != OBJECT_NONE)
-				data = volume_normalized_position(kg, sd, data);
-#endif
-			break;
-		}
-	}
-
-	stack_store_float3(stack, out_offset, data);
+  float3 data;
+  uint type = node.y;
+  uint out_offset = node.z;
+
+  switch (type) {
+    case NODE_TEXCO_OBJECT: {
+      data = sd->P + sd->dP.dx;
+      if (node.w == 0) {
+        if (sd->object != OBJECT_NONE) {
+          object_inverse_position_transform(kg, sd, &data);
+        }
+      }
+      else {
+        Transform tfm;
+        tfm.x = read_node_float(kg, offset);
+        tfm.y = read_node_float(kg, offset);
+        tfm.z = read_node_float(kg, offset);
+        data = transform_point(&tfm, data);
+      }
+      break;
+    }
+    case NODE_TEXCO_NORMAL: {
+      data = sd->N;
+      object_inverse_normal_transform(kg, sd, &data);
+      break;
+    }
+    case NODE_TEXCO_CAMERA: {
+      Transform tfm = kernel_data.cam.worldtocamera;
+
+      if (sd->object != OBJECT_NONE)
+        data = transform_point(&tfm, sd->P + sd->dP.dx);
+      else
+        data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg));
+      break;
+    }
+    case NODE_TEXCO_WINDOW: {
+      if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+          kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+        data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx);
+      else
+        data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
+      data.z = 0.0f;
+      break;
+    }
+    case NODE_TEXCO_REFLECTION: {
+      if (sd->object != OBJECT_NONE)
+        data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
+      else
+        data = sd->I;
+      break;
+    }
+    case NODE_TEXCO_DUPLI_GENERATED: {
+      data = object_dupli_generated(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_DUPLI_UV: {
+      data = object_dupli_uv(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_VOLUME_GENERATED: {
+      data = sd->P + sd->dP.dx;
+
+#  ifdef __VOLUME__
+      if (sd->object != OBJECT_NONE)
+        data = volume_normalized_position(kg, sd, data);
+#  endif
+      break;
+    }
+  }
+
+  stack_store_float3(stack, out_offset, data);
 #else
-	svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
+  svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
 #endif
 }
 
-ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
-                                           ShaderData *sd,
-                                           int path_flag,
-                                           float *stack,
-                                           uint4 node,
-                                           int *offset)
+ccl_device void svm_node_tex_coord_bump_dy(
+    KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
-	float3 data;
-	uint type = node.y;
-	uint out_offset = node.z;
-
-	switch(type) {
-		case NODE_TEXCO_OBJECT: {
-			data = sd->P + sd->dP.dy;
-			if(node.w == 0) {
-				if(sd->object != OBJECT_NONE) {
-					object_inverse_position_transform(kg, sd, &data);
-				}
-			}
-			else {
-				Transform tfm;
-				tfm.x = read_node_float(kg, offset);
-				tfm.y = read_node_float(kg, offset);
-				tfm.z = read_node_float(kg, offset);
-				data = transform_point(&tfm, data);
-			}
-			break;
-		}
-		case NODE_TEXCO_NORMAL: {
-			data = sd->N;
-			object_inverse_normal_transform(kg, sd, &data);
-			break;
-		}
-		case NODE_TEXCO_CAMERA: {
-			Transform tfm = kernel_data.cam.worldtocamera;
-
-			if(sd->object != OBJECT_NONE)
-				data = transform_point(&tfm, sd->P + sd->dP.dy);
-			else
-				data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg));
-			break;
-		}
-		case NODE_TEXCO_WINDOW: {
-			if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
-				data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy);
-			else
-				data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
-			data.z = 0.0f;
-			break;
-		}
-		case NODE_TEXCO_REFLECTION: {
-			if(sd->object != OBJECT_NONE)
-				data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
-			else
-				data = sd->I;
-			break;
-		}
-		case NODE_TEXCO_DUPLI_GENERATED: {
-			data = object_dupli_generated(kg, sd->object);
-			break;
-		}
-		case NODE_TEXCO_DUPLI_UV: {
-			data = object_dupli_uv(kg, sd->object);
-			break;
-		}
-		case NODE_TEXCO_VOLUME_GENERATED: {
-			data = sd->P + sd->dP.dy;
-
-#ifdef __VOLUME__
-			if(sd->object != OBJECT_NONE)
-				data = volume_normalized_position(kg, sd, data);
-#endif
-			break;
-		}
-	}
-
-	stack_store_float3(stack, out_offset, data);
+  float3 data;
+  uint type = node.y;
+  uint out_offset = node.z;
+
+  switch (type) {
+    case NODE_TEXCO_OBJECT: {
+      data = sd->P + sd->dP.dy;
+      if (node.w == 0) {
+        if (sd->object != OBJECT_NONE) {
+          object_inverse_position_transform(kg, sd, &data);
+        }
+      }
+      else {
+        Transform tfm;
+        tfm.x = read_node_float(kg, offset);
+        tfm.y = read_node_float(kg, offset);
+        tfm.z = read_node_float(kg, offset);
+        data = transform_point(&tfm, data);
+      }
+      break;
+    }
+    case NODE_TEXCO_NORMAL: {
+      data = sd->N;
+      object_inverse_normal_transform(kg, sd, &data);
+      break;
+    }
+    case NODE_TEXCO_CAMERA: {
+      Transform tfm = kernel_data.cam.worldtocamera;
+
+      if (sd->object != OBJECT_NONE)
+        data = transform_point(&tfm, sd->P + sd->dP.dy);
+      else
+        data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg));
+      break;
+    }
+    case NODE_TEXCO_WINDOW: {
+      if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+          kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+        data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy);
+      else
+        data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
+      data.z = 0.0f;
+      break;
+    }
+    case NODE_TEXCO_REFLECTION: {
+      if (sd->object != OBJECT_NONE)
+        data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
+      else
+        data = sd->I;
+      break;
+    }
+    case NODE_TEXCO_DUPLI_GENERATED: {
+      data = object_dupli_generated(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_DUPLI_UV: {
+      data = object_dupli_uv(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_VOLUME_GENERATED: {
+      data = sd->P + sd->dP.dy;
+
+#  ifdef __VOLUME__
+      if (sd->object != OBJECT_NONE)
+        data = volume_normalized_position(kg, sd, data);
+#  endif
+      break;
+    }
+  }
+
+  stack_store_float3(stack, out_offset, data);
 #else
-	svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
+  svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
 #endif
 }
 
 ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-	uint color_offset, strength_offset, normal_offset, space;
-	decode_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space);
-
-	float3 color = stack_load_float3(stack, color_offset);
-	color = 2.0f*make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f);
-
-	bool is_backfacing = (sd->flag & SD_BACKFACING) != 0;
-	float3 N;
-
-	if(space == NODE_NORMAL_MAP_TANGENT) {
-		/* tangent space */
-		if(sd->object == OBJECT_NONE) {
-			stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f));
-			return;
-		}
-
-		/* first try to get tangent attribute */
-		const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
-		const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
-		const AttributeDescriptor attr_normal = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL);
-
-		if(attr.offset == ATTR_STD_NOT_FOUND || attr_sign.offset == ATTR_STD_NOT_FOUND || attr_normal.offset == ATTR_STD_NOT_FOUND) {
-			stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f));
-			return;
-		}
-
-		/* get _unnormalized_ interpolated normal and tangent */
-		float3 tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
-		float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
-		float3 normal;
-
-		if(sd->shader & SHADER_SMOOTH_NORMAL) {
-			normal = primitive_surface_attribute_float3(kg, sd, attr_normal, NULL, NULL);
-		}
-		else {
-			normal = sd->Ng;
-
-			/* the normal is already inverted, which is too soon for the math here */
-			if(is_backfacing) {
-				normal = -normal;
-			}
-
-			object_inverse_normal_transform(kg, sd, &normal);
-		}
-
-		/* apply normal map */
-		float3 B = sign * cross(normal, tangent);
-		N = safe_normalize(color.x * tangent + color.y * B + color.z * normal);
-
-		/* transform to world space */
-		object_normal_transform(kg, sd, &N);
-	}
-	else {
-		/* strange blender convention */
-		if(space == NODE_NORMAL_MAP_BLENDER_OBJECT || space == NODE_NORMAL_MAP_BLENDER_WORLD) {
-			color.y = -color.y;
-			color.z = -color.z;
-		}
-
-		/* object, world space */
-		N = color;
-
-		if(space == NODE_NORMAL_MAP_OBJECT || space == NODE_NORMAL_MAP_BLENDER_OBJECT)
-			object_normal_transform(kg, sd, &N);
-		else
-			N = safe_normalize(N);
-	}
-
-	/* invert normal for backfacing polygons */
-	if(is_backfacing) {
-		N = -N;
-	}
-
-	float strength = stack_load_float(stack, strength_offset);
-
-	if(strength != 1.0f) {
-		strength = max(strength, 0.0f);
-		N = safe_normalize(sd->N + (N - sd->N)*strength);
-	}
-
-	N = ensure_valid_reflection(sd->Ng, sd->I, N);
-
-	if(is_zero(N)) {
-		N = sd->N;
-	}
-
-	stack_store_float3(stack, normal_offset, N);
+  uint color_offset, strength_offset, normal_offset, space;
+  decode_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space);
+
+  float3 color = stack_load_float3(stack, color_offset);
+  color = 2.0f * make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f);
+
+  bool is_backfacing = (sd->flag & SD_BACKFACING) != 0;
+  float3 N;
+
+  if (space == NODE_NORMAL_MAP_TANGENT) {
+    /* tangent space */
+    if (sd->object == OBJECT_NONE) {
+      stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f));
+      return;
+    }
+
+    /* first try to get tangent attribute */
+    const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
+    const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
+    const AttributeDescriptor attr_normal = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL);
+
+    if (attr.offset == ATTR_STD_NOT_FOUND || attr_sign.offset == ATTR_STD_NOT_FOUND ||
+        attr_normal.offset == ATTR_STD_NOT_FOUND) {
+      stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f));
+      return;
+    }
+
+    /* get _unnormalized_ interpolated normal and tangent */
+    float3 tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
+    float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
+    float3 normal;
+
+    if (sd->shader & SHADER_SMOOTH_NORMAL) {
+      normal = primitive_surface_attribute_float3(kg, sd, attr_normal, NULL, NULL);
+    }
+    else {
+      normal = sd->Ng;
+
+      /* the normal is already inverted, which is too soon for the math here */
+      if (is_backfacing) {
+        normal = -normal;
+      }
+
+      object_inverse_normal_transform(kg, sd, &normal);
+    }
+
+    /* apply normal map */
+    float3 B = sign * cross(normal, tangent);
+    N = safe_normalize(color.x * tangent + color.y * B + color.z * normal);
+
+    /* transform to world space */
+    object_normal_transform(kg, sd, &N);
+  }
+  else {
+    /* strange blender convention */
+    if (space == NODE_NORMAL_MAP_BLENDER_OBJECT || space == NODE_NORMAL_MAP_BLENDER_WORLD) {
+      color.y = -color.y;
+      color.z = -color.z;
+    }
+
+    /* object, world space */
+    N = color;
+
+    if (space == NODE_NORMAL_MAP_OBJECT || space == NODE_NORMAL_MAP_BLENDER_OBJECT)
+      object_normal_transform(kg, sd, &N);
+    else
+      N = safe_normalize(N);
+  }
+
+  /* invert normal for backfacing polygons */
+  if (is_backfacing) {
+    N = -N;
+  }
+
+  float strength = stack_load_float(stack, strength_offset);
+
+  if (strength != 1.0f) {
+    strength = max(strength, 0.0f);
+    N = safe_normalize(sd->N + (N - sd->N) * strength);
+  }
+
+  N = ensure_valid_reflection(sd->Ng, sd->I, N);
+
+  if (is_zero(N)) {
+    N = sd->N;
+  }
+
+  stack_store_float3(stack, normal_offset, N);
 }
 
 ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-	uint tangent_offset, direction_type, axis;
-	decode_node_uchar4(node.y, &tangent_offset, &direction_type, &axis, NULL);
-
-	float3 tangent;
-	float3 attribute_value;
-	const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
-	if (desc.offset != ATTR_STD_NOT_FOUND) {
-		if(desc.type == NODE_ATTR_FLOAT2) {
-			float2 value = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
-			attribute_value.x = value.x;
-			attribute_value.y = value.y;
-			attribute_value.z = 0.0f;
-		}
-		else {
-			attribute_value = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
-		}
-	}
-
-
-	if(direction_type == NODE_TANGENT_UVMAP) {
-		/* UV map */
-		if(desc.offset == ATTR_STD_NOT_FOUND)
-			tangent = make_float3(0.0f, 0.0f, 0.0f);
-		else
-			tangent = attribute_value;
-	}
-	else {
-		/* radial */
-		float3 generated;
-
-		if(desc.offset == ATTR_STD_NOT_FOUND)
-			generated = sd->P;
-		else
-			generated = attribute_value;
-
-		if(axis == NODE_TANGENT_AXIS_X)
-			tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f));
-		else if(axis == NODE_TANGENT_AXIS_Y)
-			tangent = make_float3(-(generated.z - 0.5f), 0.0f, (generated.x - 0.5f));
-		else
-			tangent = make_float3(-(generated.y - 0.5f), (generated.x - 0.5f), 0.0f);
-	}
-
-	object_normal_transform(kg, sd, &tangent);
-	tangent = cross(sd->N, normalize(cross(tangent, sd->N)));
-	stack_store_float3(stack, tangent_offset, tangent);
+  uint tangent_offset, direction_type, axis;
+  decode_node_uchar4(node.y, &tangent_offset, &direction_type, &axis, NULL);
+
+  float3 tangent;
+  float3 attribute_value;
+  const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
+  if (desc.offset != ATTR_STD_NOT_FOUND) {
+    if (desc.type == NODE_ATTR_FLOAT2) {
+      float2 value = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
+      attribute_value.x = value.x;
+      attribute_value.y = value.y;
+      attribute_value.z = 0.0f;
+    }
+    else {
+      attribute_value = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+    }
+  }
+
+  if (direction_type == NODE_TANGENT_UVMAP) {
+    /* UV map */
+    if (desc.offset == ATTR_STD_NOT_FOUND)
+      tangent = make_float3(0.0f, 0.0f, 0.0f);
+    else
+      tangent = attribute_value;
+  }
+  else {
+    /* radial */
+    float3 generated;
+
+    if (desc.offset == ATTR_STD_NOT_FOUND)
+      generated = sd->P;
+    else
+      generated = attribute_value;
+
+    if (axis == NODE_TANGENT_AXIS_X)
+      tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f));
+    else if (axis == NODE_TANGENT_AXIS_Y)
+      tangent = make_float3(-(generated.z - 0.5f), 0.0f, (generated.x - 0.5f));
+    else
+      tangent = make_float3(-(generated.y - 0.5f), (generated.x - 0.5f), 0.0f);
+  }
+
+  object_normal_transform(kg, sd, &tangent);
+  tangent = cross(sd->N, normalize(cross(tangent, sd->N)));
+  stack_store_float3(stack, tangent_offset, tangent);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h
index 57729817bdc..290aa85c831 100644
--- a/intern/cycles/kernel/svm/svm_texture.h
+++ b/intern/cycles/kernel/svm/svm_texture.h
@@ -20,44 +20,44 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_noinline float noise_turbulence(float3 p, float octaves, int hard)
 {
-	float fscale = 1.0f;
-	float amp = 1.0f;
-	float sum = 0.0f;
-	int i, n;
+  float fscale = 1.0f;
+  float amp = 1.0f;
+  float sum = 0.0f;
+  int i, n;
 
-	octaves = clamp(octaves, 0.0f, 16.0f);
-	n = float_to_int(octaves);
+  octaves = clamp(octaves, 0.0f, 16.0f);
+  n = float_to_int(octaves);
 
-	for(i = 0; i <= n; i++) {
-		float t = noise(fscale*p);
+  for (i = 0; i <= n; i++) {
+    float t = noise(fscale * p);
 
-		if(hard)
-			t = fabsf(2.0f*t - 1.0f);
+    if (hard)
+      t = fabsf(2.0f * t - 1.0f);
 
-		sum += t*amp;
-		amp *= 0.5f;
-		fscale *= 2.0f;
-	}
+    sum += t * amp;
+    amp *= 0.5f;
+    fscale *= 2.0f;
+  }
 
-	float rmd = octaves - floorf(octaves);
+  float rmd = octaves - floorf(octaves);
 
-	if(rmd != 0.0f) {
-		float t = noise(fscale*p);
+  if (rmd != 0.0f) {
+    float t = noise(fscale * p);
 
-		if(hard)
-			t = fabsf(2.0f*t - 1.0f);
+    if (hard)
+      t = fabsf(2.0f * t - 1.0f);
 
-		float sum2 = sum + t*amp;
+    float sum2 = sum + t * amp;
 
-		sum *= ((float)(1 << n)/(float)((1 << (n+1)) - 1));
-		sum2 *= ((float)(1 << (n+1))/(float)((1 << (n+2)) - 1));
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
 
-		return (1.0f - rmd)*sum + rmd*sum2;
-	}
-	else {
-		sum *= ((float)(1 << n)/(float)((1 << (n+1)) - 1));
-		return sum;
-	}
+    return (1.0f - rmd) * sum + rmd * sum2;
+  }
+  else {
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    return sum;
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 8b15d7bf9f4..d31e4f93696 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -38,498 +38,505 @@ CCL_NAMESPACE_BEGIN
  *
  * Lower the number of group more often the node is used.
  */
-#define NODE_GROUP_LEVEL_0    0
-#define NODE_GROUP_LEVEL_1    1
-#define NODE_GROUP_LEVEL_2    2
-#define NODE_GROUP_LEVEL_3    3
-#define NODE_GROUP_LEVEL_MAX  NODE_GROUP_LEVEL_3
-
-#define NODE_FEATURE_VOLUME     (1 << 0)
-#define NODE_FEATURE_HAIR       (1 << 1)
-#define NODE_FEATURE_BUMP       (1 << 2)
+#define NODE_GROUP_LEVEL_0 0
+#define NODE_GROUP_LEVEL_1 1
+#define NODE_GROUP_LEVEL_2 2
+#define NODE_GROUP_LEVEL_3 3
+#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_3
+
+#define NODE_FEATURE_VOLUME (1 << 0)
+#define NODE_FEATURE_HAIR (1 << 1)
+#define NODE_FEATURE_BUMP (1 << 2)
 #define NODE_FEATURE_BUMP_STATE (1 << 3)
 /* TODO(sergey): Consider using something like ((uint)(-1)).
  * Need to check carefully operand types around usage of this
  * define first.
  */
-#define NODE_FEATURE_ALL        (NODE_FEATURE_VOLUME|NODE_FEATURE_HAIR|NODE_FEATURE_BUMP|NODE_FEATURE_BUMP_STATE)
+#define NODE_FEATURE_ALL \
+  (NODE_FEATURE_VOLUME | NODE_FEATURE_HAIR | NODE_FEATURE_BUMP | NODE_FEATURE_BUMP_STATE)
 
 typedef enum ShaderNodeType {
-	NODE_END = 0,
-	NODE_CLOSURE_BSDF,
-	NODE_CLOSURE_EMISSION,
-	NODE_CLOSURE_BACKGROUND,
-	NODE_CLOSURE_SET_WEIGHT,
-	NODE_CLOSURE_WEIGHT,
-	NODE_MIX_CLOSURE,
-	NODE_JUMP_IF_ZERO,
-	NODE_JUMP_IF_ONE,
-	NODE_TEX_IMAGE,
-	NODE_TEX_IMAGE_BOX,
-	NODE_TEX_SKY,
-	NODE_GEOMETRY,
-	NODE_GEOMETRY_DUPLI,
-	NODE_LIGHT_PATH,
-	NODE_VALUE_F,
-	NODE_VALUE_V,
-	NODE_MIX,
-	NODE_ATTR,
-	NODE_CONVERT,
-	NODE_FRESNEL,
-	NODE_WIREFRAME,
-	NODE_WAVELENGTH,
-	NODE_BLACKBODY,
-	NODE_EMISSION_WEIGHT,
-	NODE_TEX_GRADIENT,
-	NODE_TEX_VORONOI,
-	NODE_TEX_MUSGRAVE,
-	NODE_TEX_WAVE,
-	NODE_TEX_MAGIC,
-	NODE_TEX_NOISE,
-	NODE_SHADER_JUMP,
-	NODE_SET_DISPLACEMENT,
-	NODE_GEOMETRY_BUMP_DX,
-	NODE_GEOMETRY_BUMP_DY,
-	NODE_SET_BUMP,
-	NODE_MATH,
-	NODE_VECTOR_MATH,
-	NODE_VECTOR_TRANSFORM,
-	NODE_MAPPING,
-	NODE_TEX_COORD,
-	NODE_TEX_COORD_BUMP_DX,
-	NODE_TEX_COORD_BUMP_DY,
-	NODE_ATTR_BUMP_DX,
-	NODE_ATTR_BUMP_DY,
-	NODE_TEX_ENVIRONMENT,
-	NODE_CLOSURE_HOLDOUT,
-	NODE_LAYER_WEIGHT,
-	NODE_CLOSURE_VOLUME,
-	NODE_SEPARATE_VECTOR,
-	NODE_COMBINE_VECTOR,
-	NODE_SEPARATE_HSV,
-	NODE_COMBINE_HSV,
-	NODE_HSV,
-	NODE_CAMERA,
-	NODE_INVERT,
-	NODE_NORMAL,
-	NODE_GAMMA,
-	NODE_TEX_CHECKER,
-	NODE_BRIGHTCONTRAST,
-	NODE_RGB_RAMP,
-	NODE_RGB_CURVES,
-	NODE_VECTOR_CURVES,
-	NODE_MIN_MAX,
-	NODE_LIGHT_FALLOFF,
-	NODE_OBJECT_INFO,
-	NODE_PARTICLE_INFO,
-	NODE_TEX_BRICK,
-	NODE_CLOSURE_SET_NORMAL,
-	NODE_AMBIENT_OCCLUSION,
-	NODE_TANGENT,
-	NODE_NORMAL_MAP,
-	NODE_HAIR_INFO,
-	NODE_UVMAP,
-	NODE_TEX_VOXEL,
-	NODE_ENTER_BUMP_EVAL,
-	NODE_LEAVE_BUMP_EVAL,
-	NODE_BEVEL,
-	NODE_DISPLACEMENT,
-	NODE_VECTOR_DISPLACEMENT,
-	NODE_PRINCIPLED_VOLUME,
-	NODE_IES,
+  NODE_END = 0,
+  NODE_CLOSURE_BSDF,
+  NODE_CLOSURE_EMISSION,
+  NODE_CLOSURE_BACKGROUND,
+  NODE_CLOSURE_SET_WEIGHT,
+  NODE_CLOSURE_WEIGHT,
+  NODE_MIX_CLOSURE,
+  NODE_JUMP_IF_ZERO,
+  NODE_JUMP_IF_ONE,
+  NODE_TEX_IMAGE,
+  NODE_TEX_IMAGE_BOX,
+  NODE_TEX_SKY,
+  NODE_GEOMETRY,
+  NODE_GEOMETRY_DUPLI,
+  NODE_LIGHT_PATH,
+  NODE_VALUE_F,
+  NODE_VALUE_V,
+  NODE_MIX,
+  NODE_ATTR,
+  NODE_CONVERT,
+  NODE_FRESNEL,
+  NODE_WIREFRAME,
+  NODE_WAVELENGTH,
+  NODE_BLACKBODY,
+  NODE_EMISSION_WEIGHT,
+  NODE_TEX_GRADIENT,
+  NODE_TEX_VORONOI,
+  NODE_TEX_MUSGRAVE,
+  NODE_TEX_WAVE,
+  NODE_TEX_MAGIC,
+  NODE_TEX_NOISE,
+  NODE_SHADER_JUMP,
+  NODE_SET_DISPLACEMENT,
+  NODE_GEOMETRY_BUMP_DX,
+  NODE_GEOMETRY_BUMP_DY,
+  NODE_SET_BUMP,
+  NODE_MATH,
+  NODE_VECTOR_MATH,
+  NODE_VECTOR_TRANSFORM,
+  NODE_MAPPING,
+  NODE_TEX_COORD,
+  NODE_TEX_COORD_BUMP_DX,
+  NODE_TEX_COORD_BUMP_DY,
+  NODE_ATTR_BUMP_DX,
+  NODE_ATTR_BUMP_DY,
+  NODE_TEX_ENVIRONMENT,
+  NODE_CLOSURE_HOLDOUT,
+  NODE_LAYER_WEIGHT,
+  NODE_CLOSURE_VOLUME,
+  NODE_SEPARATE_VECTOR,
+  NODE_COMBINE_VECTOR,
+  NODE_SEPARATE_HSV,
+  NODE_COMBINE_HSV,
+  NODE_HSV,
+  NODE_CAMERA,
+  NODE_INVERT,
+  NODE_NORMAL,
+  NODE_GAMMA,
+  NODE_TEX_CHECKER,
+  NODE_BRIGHTCONTRAST,
+  NODE_RGB_RAMP,
+  NODE_RGB_CURVES,
+  NODE_VECTOR_CURVES,
+  NODE_MIN_MAX,
+  NODE_LIGHT_FALLOFF,
+  NODE_OBJECT_INFO,
+  NODE_PARTICLE_INFO,
+  NODE_TEX_BRICK,
+  NODE_CLOSURE_SET_NORMAL,
+  NODE_AMBIENT_OCCLUSION,
+  NODE_TANGENT,
+  NODE_NORMAL_MAP,
+  NODE_HAIR_INFO,
+  NODE_UVMAP,
+  NODE_TEX_VOXEL,
+  NODE_ENTER_BUMP_EVAL,
+  NODE_LEAVE_BUMP_EVAL,
+  NODE_BEVEL,
+  NODE_DISPLACEMENT,
+  NODE_VECTOR_DISPLACEMENT,
+  NODE_PRINCIPLED_VOLUME,
+  NODE_IES,
 } ShaderNodeType;
 
 typedef enum NodeAttributeType {
-	NODE_ATTR_FLOAT = 0,
-	NODE_ATTR_FLOAT2,
-	NODE_ATTR_FLOAT3,
-	NODE_ATTR_MATRIX
+  NODE_ATTR_FLOAT = 0,
+  NODE_ATTR_FLOAT2,
+  NODE_ATTR_FLOAT3,
+  NODE_ATTR_MATRIX
 } NodeAttributeType;
 
 typedef enum NodeGeometry {
-	NODE_GEOM_P = 0,
-	NODE_GEOM_N,
-	NODE_GEOM_T,
-	NODE_GEOM_I,
-	NODE_GEOM_Ng,
-	NODE_GEOM_uv
+  NODE_GEOM_P = 0,
+  NODE_GEOM_N,
+  NODE_GEOM_T,
+  NODE_GEOM_I,
+  NODE_GEOM_Ng,
+  NODE_GEOM_uv
 } NodeGeometry;
 
 typedef enum NodeObjectInfo {
-	NODE_INFO_OB_LOCATION,
-	NODE_INFO_OB_INDEX,
-	NODE_INFO_MAT_INDEX,
-	NODE_INFO_OB_RANDOM
+  NODE_INFO_OB_LOCATION,
+  NODE_INFO_OB_INDEX,
+  NODE_INFO_MAT_INDEX,
+  NODE_INFO_OB_RANDOM
 } NodeObjectInfo;
 
 typedef enum NodeParticleInfo {
-	NODE_INFO_PAR_INDEX,
-	NODE_INFO_PAR_RANDOM,
-	NODE_INFO_PAR_AGE,
-	NODE_INFO_PAR_LIFETIME,
-	NODE_INFO_PAR_LOCATION,
-	NODE_INFO_PAR_ROTATION,
-	NODE_INFO_PAR_SIZE,
-	NODE_INFO_PAR_VELOCITY,
-	NODE_INFO_PAR_ANGULAR_VELOCITY
+  NODE_INFO_PAR_INDEX,
+  NODE_INFO_PAR_RANDOM,
+  NODE_INFO_PAR_AGE,
+  NODE_INFO_PAR_LIFETIME,
+  NODE_INFO_PAR_LOCATION,
+  NODE_INFO_PAR_ROTATION,
+  NODE_INFO_PAR_SIZE,
+  NODE_INFO_PAR_VELOCITY,
+  NODE_INFO_PAR_ANGULAR_VELOCITY
 } NodeParticleInfo;
 
 typedef enum NodeHairInfo {
-	NODE_INFO_CURVE_IS_STRAND,
-	NODE_INFO_CURVE_INTERCEPT,
-	NODE_INFO_CURVE_THICKNESS,
-	/*fade for minimum hair width transpency*/
-	/*NODE_INFO_CURVE_FADE,*/
-	NODE_INFO_CURVE_TANGENT_NORMAL,
-	NODE_INFO_CURVE_RANDOM,
+  NODE_INFO_CURVE_IS_STRAND,
+  NODE_INFO_CURVE_INTERCEPT,
+  NODE_INFO_CURVE_THICKNESS,
+  /*fade for minimum hair width transpency*/
+  /*NODE_INFO_CURVE_FADE,*/
+  NODE_INFO_CURVE_TANGENT_NORMAL,
+  NODE_INFO_CURVE_RANDOM,
 } NodeHairInfo;
 
 typedef enum NodeLightPath {
-	NODE_LP_camera = 0,
-	NODE_LP_shadow,
-	NODE_LP_diffuse,
-	NODE_LP_glossy,
-	NODE_LP_singular,
-	NODE_LP_reflection,
-	NODE_LP_transmission,
-	NODE_LP_volume_scatter,
-	NODE_LP_backfacing,
-	NODE_LP_ray_length,
-	NODE_LP_ray_depth,
-	NODE_LP_ray_diffuse,
-	NODE_LP_ray_glossy,
-	NODE_LP_ray_transparent,
-	NODE_LP_ray_transmission,
+  NODE_LP_camera = 0,
+  NODE_LP_shadow,
+  NODE_LP_diffuse,
+  NODE_LP_glossy,
+  NODE_LP_singular,
+  NODE_LP_reflection,
+  NODE_LP_transmission,
+  NODE_LP_volume_scatter,
+  NODE_LP_backfacing,
+  NODE_LP_ray_length,
+  NODE_LP_ray_depth,
+  NODE_LP_ray_diffuse,
+  NODE_LP_ray_glossy,
+  NODE_LP_ray_transparent,
+  NODE_LP_ray_transmission,
 } NodeLightPath;
 
 typedef enum NodeLightFalloff {
-	NODE_LIGHT_FALLOFF_QUADRATIC,
-	NODE_LIGHT_FALLOFF_LINEAR,
-	NODE_LIGHT_FALLOFF_CONSTANT
+  NODE_LIGHT_FALLOFF_QUADRATIC,
+  NODE_LIGHT_FALLOFF_LINEAR,
+  NODE_LIGHT_FALLOFF_CONSTANT
 } NodeLightFalloff;
 
 typedef enum NodeTexCoord {
-	NODE_TEXCO_NORMAL,
-	NODE_TEXCO_OBJECT,
-	NODE_TEXCO_CAMERA,
-	NODE_TEXCO_WINDOW,
-	NODE_TEXCO_REFLECTION,
-	NODE_TEXCO_DUPLI_GENERATED,
-	NODE_TEXCO_DUPLI_UV,
-	NODE_TEXCO_VOLUME_GENERATED
+  NODE_TEXCO_NORMAL,
+  NODE_TEXCO_OBJECT,
+  NODE_TEXCO_CAMERA,
+  NODE_TEXCO_WINDOW,
+  NODE_TEXCO_REFLECTION,
+  NODE_TEXCO_DUPLI_GENERATED,
+  NODE_TEXCO_DUPLI_UV,
+  NODE_TEXCO_VOLUME_GENERATED
 } NodeTexCoord;
 
 typedef enum NodeMix {
-	NODE_MIX_BLEND = 0,
-	NODE_MIX_ADD,
-	NODE_MIX_MUL,
-	NODE_MIX_SUB,
-	NODE_MIX_SCREEN,
-	NODE_MIX_DIV,
-	NODE_MIX_DIFF,
-	NODE_MIX_DARK,
-	NODE_MIX_LIGHT,
-	NODE_MIX_OVERLAY,
-	NODE_MIX_DODGE,
-	NODE_MIX_BURN,
-	NODE_MIX_HUE,
-	NODE_MIX_SAT,
-	NODE_MIX_VAL,
-	NODE_MIX_COLOR,
-	NODE_MIX_SOFT,
-	NODE_MIX_LINEAR,
-	NODE_MIX_CLAMP /* used for the clamp UI option */
+  NODE_MIX_BLEND = 0,
+  NODE_MIX_ADD,
+  NODE_MIX_MUL,
+  NODE_MIX_SUB,
+  NODE_MIX_SCREEN,
+  NODE_MIX_DIV,
+  NODE_MIX_DIFF,
+  NODE_MIX_DARK,
+  NODE_MIX_LIGHT,
+  NODE_MIX_OVERLAY,
+  NODE_MIX_DODGE,
+  NODE_MIX_BURN,
+  NODE_MIX_HUE,
+  NODE_MIX_SAT,
+  NODE_MIX_VAL,
+  NODE_MIX_COLOR,
+  NODE_MIX_SOFT,
+  NODE_MIX_LINEAR,
+  NODE_MIX_CLAMP /* used for the clamp UI option */
 } NodeMix;
 
 typedef enum NodeMath {
-	NODE_MATH_ADD,
-	NODE_MATH_SUBTRACT,
-	NODE_MATH_MULTIPLY,
-	NODE_MATH_DIVIDE,
-	NODE_MATH_SINE,
-	NODE_MATH_COSINE,
-	NODE_MATH_TANGENT,
-	NODE_MATH_ARCSINE,
-	NODE_MATH_ARCCOSINE,
-	NODE_MATH_ARCTANGENT,
-	NODE_MATH_POWER,
-	NODE_MATH_LOGARITHM,
-	NODE_MATH_MINIMUM,
-	NODE_MATH_MAXIMUM,
-	NODE_MATH_ROUND,
-	NODE_MATH_LESS_THAN,
-	NODE_MATH_GREATER_THAN,
-	NODE_MATH_MODULO,
-	NODE_MATH_ABSOLUTE,
-	NODE_MATH_ARCTAN2,
-	NODE_MATH_FLOOR,
-	NODE_MATH_CEIL,
-	NODE_MATH_FRACT,
-	NODE_MATH_SQRT,
-	NODE_MATH_CLAMP /* used for the clamp UI option */
+  NODE_MATH_ADD,
+  NODE_MATH_SUBTRACT,
+  NODE_MATH_MULTIPLY,
+  NODE_MATH_DIVIDE,
+  NODE_MATH_SINE,
+  NODE_MATH_COSINE,
+  NODE_MATH_TANGENT,
+  NODE_MATH_ARCSINE,
+  NODE_MATH_ARCCOSINE,
+  NODE_MATH_ARCTANGENT,
+  NODE_MATH_POWER,
+  NODE_MATH_LOGARITHM,
+  NODE_MATH_MINIMUM,
+  NODE_MATH_MAXIMUM,
+  NODE_MATH_ROUND,
+  NODE_MATH_LESS_THAN,
+  NODE_MATH_GREATER_THAN,
+  NODE_MATH_MODULO,
+  NODE_MATH_ABSOLUTE,
+  NODE_MATH_ARCTAN2,
+  NODE_MATH_FLOOR,
+  NODE_MATH_CEIL,
+  NODE_MATH_FRACT,
+  NODE_MATH_SQRT,
+  NODE_MATH_CLAMP /* used for the clamp UI option */
 } NodeMath;
 
 typedef enum NodeVectorMath {
-	NODE_VECTOR_MATH_ADD,
-	NODE_VECTOR_MATH_SUBTRACT,
-	NODE_VECTOR_MATH_AVERAGE,
-	NODE_VECTOR_MATH_DOT_PRODUCT,
-	NODE_VECTOR_MATH_CROSS_PRODUCT,
-	NODE_VECTOR_MATH_NORMALIZE
+  NODE_VECTOR_MATH_ADD,
+  NODE_VECTOR_MATH_SUBTRACT,
+  NODE_VECTOR_MATH_AVERAGE,
+  NODE_VECTOR_MATH_DOT_PRODUCT,
+  NODE_VECTOR_MATH_CROSS_PRODUCT,
+  NODE_VECTOR_MATH_NORMALIZE
 } NodeVectorMath;
 
 typedef enum NodeVectorTransformType {
-	NODE_VECTOR_TRANSFORM_TYPE_VECTOR,
-	NODE_VECTOR_TRANSFORM_TYPE_POINT,
-	NODE_VECTOR_TRANSFORM_TYPE_NORMAL
+  NODE_VECTOR_TRANSFORM_TYPE_VECTOR,
+  NODE_VECTOR_TRANSFORM_TYPE_POINT,
+  NODE_VECTOR_TRANSFORM_TYPE_NORMAL
 } NodeVectorTransformType;
 
 typedef enum NodeVectorTransformConvertSpace {
-	NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD,
-	NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT,
-	NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA
+  NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD,
+  NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT,
+  NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA
 } NodeVectorTransformConvertSpace;
 
 typedef enum NodeConvert {
-	NODE_CONVERT_FV,
-	NODE_CONVERT_FI,
-	NODE_CONVERT_CF,
-	NODE_CONVERT_CI,
-	NODE_CONVERT_VF,
-	NODE_CONVERT_VI,
-	NODE_CONVERT_IF,
-	NODE_CONVERT_IV
+  NODE_CONVERT_FV,
+  NODE_CONVERT_FI,
+  NODE_CONVERT_CF,
+  NODE_CONVERT_CI,
+  NODE_CONVERT_VF,
+  NODE_CONVERT_VI,
+  NODE_CONVERT_IF,
+  NODE_CONVERT_IV
 } NodeConvert;
 
 typedef enum NodeMusgraveType {
-	NODE_MUSGRAVE_MULTIFRACTAL,
-	NODE_MUSGRAVE_FBM,
-	NODE_MUSGRAVE_HYBRID_MULTIFRACTAL,
-	NODE_MUSGRAVE_RIDGED_MULTIFRACTAL,
-	NODE_MUSGRAVE_HETERO_TERRAIN
+  NODE_MUSGRAVE_MULTIFRACTAL,
+  NODE_MUSGRAVE_FBM,
+  NODE_MUSGRAVE_HYBRID_MULTIFRACTAL,
+  NODE_MUSGRAVE_RIDGED_MULTIFRACTAL,
+  NODE_MUSGRAVE_HETERO_TERRAIN
 } NodeMusgraveType;
 
-typedef enum NodeWaveType {
-	NODE_WAVE_BANDS,
-	NODE_WAVE_RINGS
-} NodeWaveType;
+typedef enum NodeWaveType { NODE_WAVE_BANDS, NODE_WAVE_RINGS } NodeWaveType;
 
 typedef enum NodeWaveProfiles {
-	NODE_WAVE_PROFILE_SIN,
-	NODE_WAVE_PROFILE_SAW,
+  NODE_WAVE_PROFILE_SIN,
+  NODE_WAVE_PROFILE_SAW,
 } NodeWaveProfile;
 
-typedef enum NodeSkyType {
-	NODE_SKY_OLD,
-	NODE_SKY_NEW
-} NodeSkyType;
+typedef enum NodeSkyType { NODE_SKY_OLD, NODE_SKY_NEW } NodeSkyType;
 
 typedef enum NodeGradientType {
-	NODE_BLEND_LINEAR,
-	NODE_BLEND_QUADRATIC,
-	NODE_BLEND_EASING,
-	NODE_BLEND_DIAGONAL,
-	NODE_BLEND_RADIAL,
-	NODE_BLEND_QUADRATIC_SPHERE,
-	NODE_BLEND_SPHERICAL
+  NODE_BLEND_LINEAR,
+  NODE_BLEND_QUADRATIC,
+  NODE_BLEND_EASING,
+  NODE_BLEND_DIAGONAL,
+  NODE_BLEND_RADIAL,
+  NODE_BLEND_QUADRATIC_SPHERE,
+  NODE_BLEND_SPHERICAL
 } NodeGradientType;
 
 typedef enum NodeVoronoiColoring {
-	NODE_VORONOI_INTENSITY,
-	NODE_VORONOI_CELLS
+  NODE_VORONOI_INTENSITY,
+  NODE_VORONOI_CELLS
 } NodeVoronoiColoring;
 
 typedef enum NodeVoronoiDistanceMetric {
-	NODE_VORONOI_DISTANCE,
-	NODE_VORONOI_MANHATTAN,
-	NODE_VORONOI_CHEBYCHEV,
-	NODE_VORONOI_MINKOWSKI
+  NODE_VORONOI_DISTANCE,
+  NODE_VORONOI_MANHATTAN,
+  NODE_VORONOI_CHEBYCHEV,
+  NODE_VORONOI_MINKOWSKI
 } NodeVoronoiDistanceMetric;
 
 typedef enum NodeVoronoiFeature {
-	NODE_VORONOI_F1,
-	NODE_VORONOI_F2,
-	NODE_VORONOI_F3,
-	NODE_VORONOI_F4,
-	NODE_VORONOI_F2F1
+  NODE_VORONOI_F1,
+  NODE_VORONOI_F2,
+  NODE_VORONOI_F3,
+  NODE_VORONOI_F4,
+  NODE_VORONOI_F2F1
 } NodeVoronoiFeature;
 
 typedef enum NodeBlendWeightType {
-	NODE_LAYER_WEIGHT_FRESNEL,
-	NODE_LAYER_WEIGHT_FACING
+  NODE_LAYER_WEIGHT_FRESNEL,
+  NODE_LAYER_WEIGHT_FACING
 } NodeBlendWeightType;
 
 typedef enum NodeTangentDirectionType {
-	NODE_TANGENT_RADIAL,
-	NODE_TANGENT_UVMAP
+  NODE_TANGENT_RADIAL,
+  NODE_TANGENT_UVMAP
 } NodeTangentDirectionType;
 
 typedef enum NodeTangentAxis {
-	NODE_TANGENT_AXIS_X,
-	NODE_TANGENT_AXIS_Y,
-	NODE_TANGENT_AXIS_Z
+  NODE_TANGENT_AXIS_X,
+  NODE_TANGENT_AXIS_Y,
+  NODE_TANGENT_AXIS_Z
 } NodeTangentAxis;
 
 typedef enum NodeNormalMapSpace {
-	NODE_NORMAL_MAP_TANGENT,
-	NODE_NORMAL_MAP_OBJECT,
-	NODE_NORMAL_MAP_WORLD,
-	NODE_NORMAL_MAP_BLENDER_OBJECT,
-	NODE_NORMAL_MAP_BLENDER_WORLD,
+  NODE_NORMAL_MAP_TANGENT,
+  NODE_NORMAL_MAP_OBJECT,
+  NODE_NORMAL_MAP_WORLD,
+  NODE_NORMAL_MAP_BLENDER_OBJECT,
+  NODE_NORMAL_MAP_BLENDER_WORLD,
 } NodeNormalMapSpace;
 
 typedef enum NodeImageColorSpace {
-	NODE_COLOR_SPACE_NONE  = 0,
-	NODE_COLOR_SPACE_COLOR = 1,
+  NODE_COLOR_SPACE_NONE = 0,
+  NODE_COLOR_SPACE_COLOR = 1,
 } NodeImageColorSpace;
 
 typedef enum NodeImageProjection {
-	NODE_IMAGE_PROJ_FLAT   = 0,
-	NODE_IMAGE_PROJ_BOX    = 1,
-	NODE_IMAGE_PROJ_SPHERE = 2,
-	NODE_IMAGE_PROJ_TUBE   = 3,
+  NODE_IMAGE_PROJ_FLAT = 0,
+  NODE_IMAGE_PROJ_BOX = 1,
+  NODE_IMAGE_PROJ_SPHERE = 2,
+  NODE_IMAGE_PROJ_TUBE = 3,
 } NodeImageProjection;
 
 typedef enum NodeEnvironmentProjection {
-	NODE_ENVIRONMENT_EQUIRECTANGULAR = 0,
-	NODE_ENVIRONMENT_MIRROR_BALL = 1,
+  NODE_ENVIRONMENT_EQUIRECTANGULAR = 0,
+  NODE_ENVIRONMENT_MIRROR_BALL = 1,
 } NodeEnvironmentProjection;
 
 typedef enum NodeBumpOffset {
-	NODE_BUMP_OFFSET_CENTER,
-	NODE_BUMP_OFFSET_DX,
-	NODE_BUMP_OFFSET_DY,
+  NODE_BUMP_OFFSET_CENTER,
+  NODE_BUMP_OFFSET_DX,
+  NODE_BUMP_OFFSET_DY,
 } NodeBumpOffset;
 
 typedef enum NodeTexVoxelSpace {
-	NODE_TEX_VOXEL_SPACE_OBJECT = 0,
-	NODE_TEX_VOXEL_SPACE_WORLD  = 1,
+  NODE_TEX_VOXEL_SPACE_OBJECT = 0,
+  NODE_TEX_VOXEL_SPACE_WORLD = 1,
 } NodeTexVoxelSpace;
 
 typedef enum NodeAO {
-	NODE_AO_ONLY_LOCAL = (1 << 0),
-	NODE_AO_INSIDE = (1 << 1),
-	NODE_AO_GLOBAL_RADIUS = (1 << 2),
+  NODE_AO_ONLY_LOCAL = (1 << 0),
+  NODE_AO_INSIDE = (1 << 1),
+  NODE_AO_GLOBAL_RADIUS = (1 << 2),
 } NodeAO;
 
 typedef enum ShaderType {
-	SHADER_TYPE_SURFACE,
-	SHADER_TYPE_VOLUME,
-	SHADER_TYPE_DISPLACEMENT,
-	SHADER_TYPE_BUMP,
+  SHADER_TYPE_SURFACE,
+  SHADER_TYPE_VOLUME,
+  SHADER_TYPE_DISPLACEMENT,
+  SHADER_TYPE_BUMP,
 } ShaderType;
 
 typedef enum NodePrincipledHairParametrization {
-	NODE_PRINCIPLED_HAIR_REFLECTANCE = 0,
-	NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION = 1,
-	NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION = 2,
-	NODE_PRINCIPLED_HAIR_NUM,
+  NODE_PRINCIPLED_HAIR_REFLECTANCE = 0,
+  NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION = 1,
+  NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION = 2,
+  NODE_PRINCIPLED_HAIR_NUM,
 } NodePrincipledHairParametrization;
 
 /* Closure */
 
 typedef enum ClosureType {
-	/* Special type, flags generic node as a non-BSDF. */
-	CLOSURE_NONE_ID,
-
-	CLOSURE_BSDF_ID,
-
-	/* Diffuse */
-	CLOSURE_BSDF_DIFFUSE_ID,
-	CLOSURE_BSDF_OREN_NAYAR_ID,
-	CLOSURE_BSDF_DIFFUSE_RAMP_ID,
-	CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID,
-	CLOSURE_BSDF_PRINCIPLED_SHEEN_ID,
-	CLOSURE_BSDF_DIFFUSE_TOON_ID,
-
-	/* Glossy */
-	CLOSURE_BSDF_REFLECTION_ID,
-	CLOSURE_BSDF_MICROFACET_GGX_ID,
-	CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID,
-	CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID,
-	CLOSURE_BSDF_MICROFACET_BECKMANN_ID,
-	CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID,
-	CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID,
-	CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID,
-	CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID,
-	CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID,
-	CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID,
-	CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_FRESNEL_ID,
-	CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID,
-	CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID,
-	CLOSURE_BSDF_ASHIKHMIN_VELVET_ID,
-	CLOSURE_BSDF_PHONG_RAMP_ID,
-	CLOSURE_BSDF_GLOSSY_TOON_ID,
-	CLOSURE_BSDF_HAIR_REFLECTION_ID,
-
-	/* Transmission */
-	CLOSURE_BSDF_TRANSLUCENT_ID,
-	CLOSURE_BSDF_REFRACTION_ID,
-	CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID,
-	CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID,
-	CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID,
-	CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID,
-	CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID,
-	CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID,
-	CLOSURE_BSDF_SHARP_GLASS_ID,
-	CLOSURE_BSDF_HAIR_PRINCIPLED_ID,
-	CLOSURE_BSDF_HAIR_TRANSMISSION_ID,
-
-	/* Special cases */
-	CLOSURE_BSDF_BSSRDF_ID,
-	CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID,
-	CLOSURE_BSDF_TRANSPARENT_ID,
-
-	/* BSSRDF */
-	CLOSURE_BSSRDF_CUBIC_ID,
-	CLOSURE_BSSRDF_GAUSSIAN_ID,
-	CLOSURE_BSSRDF_PRINCIPLED_ID,
-	CLOSURE_BSSRDF_BURLEY_ID,
-	CLOSURE_BSSRDF_RANDOM_WALK_ID,
-	CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID,
-
-	/* Other */
-	CLOSURE_HOLDOUT_ID,
-
-	/* Volume */
-	CLOSURE_VOLUME_ID,
-	CLOSURE_VOLUME_ABSORPTION_ID,
-	CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID,
-
-	CLOSURE_BSDF_PRINCIPLED_ID,
-
-	NBUILTIN_CLOSURES
+  /* Special type, flags generic node as a non-BSDF. */
+  CLOSURE_NONE_ID,
+
+  CLOSURE_BSDF_ID,
+
+  /* Diffuse */
+  CLOSURE_BSDF_DIFFUSE_ID,
+  CLOSURE_BSDF_OREN_NAYAR_ID,
+  CLOSURE_BSDF_DIFFUSE_RAMP_ID,
+  CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID,
+  CLOSURE_BSDF_PRINCIPLED_SHEEN_ID,
+  CLOSURE_BSDF_DIFFUSE_TOON_ID,
+
+  /* Glossy */
+  CLOSURE_BSDF_REFLECTION_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID,
+  CLOSURE_BSDF_MICROFACET_BECKMANN_ID,
+  CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID,
+  CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID,
+  CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID,
+  CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID,
+  CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_FRESNEL_ID,
+  CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID,
+  CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID,
+  CLOSURE_BSDF_ASHIKHMIN_VELVET_ID,
+  CLOSURE_BSDF_PHONG_RAMP_ID,
+  CLOSURE_BSDF_GLOSSY_TOON_ID,
+  CLOSURE_BSDF_HAIR_REFLECTION_ID,
+
+  /* Transmission */
+  CLOSURE_BSDF_TRANSLUCENT_ID,
+  CLOSURE_BSDF_REFRACTION_ID,
+  CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID,
+  CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID,
+  CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID,
+  CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID,
+  CLOSURE_BSDF_SHARP_GLASS_ID,
+  CLOSURE_BSDF_HAIR_PRINCIPLED_ID,
+  CLOSURE_BSDF_HAIR_TRANSMISSION_ID,
+
+  /* Special cases */
+  CLOSURE_BSDF_BSSRDF_ID,
+  CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID,
+  CLOSURE_BSDF_TRANSPARENT_ID,
+
+  /* BSSRDF */
+  CLOSURE_BSSRDF_CUBIC_ID,
+  CLOSURE_BSSRDF_GAUSSIAN_ID,
+  CLOSURE_BSSRDF_PRINCIPLED_ID,
+  CLOSURE_BSSRDF_BURLEY_ID,
+  CLOSURE_BSSRDF_RANDOM_WALK_ID,
+  CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID,
+
+  /* Other */
+  CLOSURE_HOLDOUT_ID,
+
+  /* Volume */
+  CLOSURE_VOLUME_ID,
+  CLOSURE_VOLUME_ABSORPTION_ID,
+  CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID,
+
+  CLOSURE_BSDF_PRINCIPLED_ID,
+
+  NBUILTIN_CLOSURES
 } ClosureType;
 
 /* watch this, being lazy with memory usage */
 #define CLOSURE_IS_BSDF(type) (type <= CLOSURE_BSDF_TRANSPARENT_ID)
-#define CLOSURE_IS_BSDF_DIFFUSE(type) (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_DIFFUSE_TOON_ID)
-#define CLOSURE_IS_BSDF_GLOSSY(type) ((type >= CLOSURE_BSDF_REFLECTION_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID )|| (type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID))
-#define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSLUCENT_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
-#define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID || type == CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID)
-#define CLOSURE_IS_BSDF_SINGULAR(type) (type == CLOSURE_BSDF_REFLECTION_ID || \
-                                        type == CLOSURE_BSDF_REFRACTION_ID || \
-                                        type == CLOSURE_BSDF_TRANSPARENT_ID)
+#define CLOSURE_IS_BSDF_DIFFUSE(type) \
+  (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_DIFFUSE_TOON_ID)
+#define CLOSURE_IS_BSDF_GLOSSY(type) \
+  ((type >= CLOSURE_BSDF_REFLECTION_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID) || \
+   (type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID))
+#define CLOSURE_IS_BSDF_TRANSMISSION(type) \
+  (type >= CLOSURE_BSDF_TRANSLUCENT_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
+#define CLOSURE_IS_BSDF_BSSRDF(type) \
+  (type == CLOSURE_BSDF_BSSRDF_ID || type == CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID)
+#define CLOSURE_IS_BSDF_SINGULAR(type) \
+  (type == CLOSURE_BSDF_REFLECTION_ID || type == CLOSURE_BSDF_REFRACTION_ID || \
+   type == CLOSURE_BSDF_TRANSPARENT_ID)
 #define CLOSURE_IS_BSDF_TRANSPARENT(type) (type == CLOSURE_BSDF_TRANSPARENT_ID)
-#define CLOSURE_IS_BSDF_MULTISCATTER(type) (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID ||\
-                                            type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID || \
-                                            type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID)
-#define CLOSURE_IS_BSDF_MICROFACET(type) ((type >= CLOSURE_BSDF_MICROFACET_GGX_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID) ||\
-                                          (type >= CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID && type <= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) ||\
-                                          (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID))
+#define CLOSURE_IS_BSDF_MULTISCATTER(type) \
+  (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID || \
+   type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID || \
+   type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID)
+#define CLOSURE_IS_BSDF_MICROFACET(type) \
+  ((type >= CLOSURE_BSDF_MICROFACET_GGX_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID) || \
+   (type >= CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID && \
+    type <= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) || \
+   (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID))
 #define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
-#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
-#define CLOSURE_IS_DISK_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_BURLEY_ID)
-#define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
+#define CLOSURE_IS_BSSRDF(type) \
+  (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
+#define CLOSURE_IS_DISK_BSSRDF(type) \
+  (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_BURLEY_ID)
+#define CLOSURE_IS_VOLUME(type) \
+  (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
 #define CLOSURE_IS_VOLUME_SCATTER(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
 #define CLOSURE_IS_VOLUME_ABSORPTION(type) (type == CLOSURE_VOLUME_ABSORPTION_ID)
 #define CLOSURE_IS_HOLDOUT(type) (type == CLOSURE_HOLDOUT_ID)
 #define CLOSURE_IS_PHASE(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
-#define CLOSURE_IS_GLASS(type) (type >= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID)
+#define CLOSURE_IS_GLASS(type) \
+  (type >= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID)
 #define CLOSURE_IS_PRINCIPLED(type) (type == CLOSURE_BSDF_PRINCIPLED_ID)
 
 #define CLOSURE_WEIGHT_CUTOFF 1e-5f
 
 CCL_NAMESPACE_END
 
-#endif  /*  __SVM_TYPES_H__ */
+#endif /*  __SVM_TYPES_H__ */
diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h
index 062aee2956e..5b76f2c8832 100644
--- a/intern/cycles/kernel/svm/svm_value.h
+++ b/intern/cycles/kernel/svm/svm_value.h
@@ -18,18 +18,21 @@ CCL_NAMESPACE_BEGIN
 
 /* Value Nodes */
 
-ccl_device void svm_node_value_f(KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset)
+ccl_device void svm_node_value_f(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset)
 {
-	stack_store_float(stack, out_offset, __uint_as_float(ivalue));
+  stack_store_float(stack, out_offset, __uint_as_float(ivalue));
 }
 
-ccl_device void svm_node_value_v(KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset)
+ccl_device void svm_node_value_v(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset)
 {
-	/* read extra data */
-	uint4 node1 = read_node(kg, offset);
-	float3 p = make_float3(__uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w));
+  /* read extra data */
+  uint4 node1 = read_node(kg, offset);
+  float3 p = make_float3(
+      __uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w));
 
-	stack_store_float3(stack, out_offset, p);
+  stack_store_float3(stack, out_offset, p);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h
index f6ec36ba41f..7ec0f07f2e4 100644
--- a/intern/cycles/kernel/svm/svm_vector_transform.h
+++ b/intern/cycles/kernel/svm/svm_vector_transform.h
@@ -18,83 +18,90 @@ CCL_NAMESPACE_BEGIN
 
 /* Vector Transform */
 
-ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device void svm_node_vector_transform(KernelGlobals *kg,
+                                          ShaderData *sd,
+                                          float *stack,
+                                          uint4 node)
 {
-	uint itype, ifrom, ito;
-	uint vector_in, vector_out;
+  uint itype, ifrom, ito;
+  uint vector_in, vector_out;
 
-	decode_node_uchar4(node.y, &itype, &ifrom, &ito, NULL);
-	decode_node_uchar4(node.z, &vector_in, &vector_out, NULL, NULL);
+  decode_node_uchar4(node.y, &itype, &ifrom, &ito, NULL);
+  decode_node_uchar4(node.z, &vector_in, &vector_out, NULL, NULL);
 
-	float3 in = stack_load_float3(stack, vector_in);
+  float3 in = stack_load_float3(stack, vector_in);
 
-	NodeVectorTransformType type = (NodeVectorTransformType)itype;
-	NodeVectorTransformConvertSpace from = (NodeVectorTransformConvertSpace)ifrom;
-	NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito;
+  NodeVectorTransformType type = (NodeVectorTransformType)itype;
+  NodeVectorTransformConvertSpace from = (NodeVectorTransformConvertSpace)ifrom;
+  NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito;
 
-	Transform tfm;
-	bool is_object = (sd->object != OBJECT_NONE);
-	bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR || type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL);
+  Transform tfm;
+  bool is_object = (sd->object != OBJECT_NONE);
+  bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR ||
+                       type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL);
 
-	/* From world */
-	if(from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD) {
-		if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
-			tfm = kernel_data.cam.worldtocamera;
-			if(is_direction)
-				in = transform_direction(&tfm, in);
-			else
-				in = transform_point(&tfm, in);
-		}
-		else if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
-			if(is_direction)
-				object_inverse_dir_transform(kg, sd, &in);
-			else
-				object_inverse_position_transform(kg, sd, &in);
-		}
-	}
+  /* From world */
+  if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD) {
+    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
+      tfm = kernel_data.cam.worldtocamera;
+      if (is_direction)
+        in = transform_direction(&tfm, in);
+      else
+        in = transform_point(&tfm, in);
+    }
+    else if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
+      if (is_direction)
+        object_inverse_dir_transform(kg, sd, &in);
+      else
+        object_inverse_position_transform(kg, sd, &in);
+    }
+  }
 
-	/* From camera */
-	else if(from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
-		if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD || to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
-			tfm = kernel_data.cam.cameratoworld;
-			if(is_direction)
-				in = transform_direction(&tfm, in);
-			else
-				in = transform_point(&tfm, in);
-		}
-		if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
-			if(is_direction)
-				object_inverse_dir_transform(kg, sd, &in);
-			else
-				object_inverse_position_transform(kg, sd, &in);
-		}
-	}
+  /* From camera */
+  else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
+    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD ||
+        to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
+      tfm = kernel_data.cam.cameratoworld;
+      if (is_direction)
+        in = transform_direction(&tfm, in);
+      else
+        in = transform_point(&tfm, in);
+    }
+    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
+      if (is_direction)
+        object_inverse_dir_transform(kg, sd, &in);
+      else
+        object_inverse_position_transform(kg, sd, &in);
+    }
+  }
 
-	/* From object */
-	else if(from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
-		if((to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD || to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) && is_object) {
-			if(is_direction)
-				object_dir_transform(kg, sd, &in);
-			else
-				object_position_transform(kg, sd, &in);
-		}
-		if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
-			tfm = kernel_data.cam.worldtocamera;
-			if(is_direction)
-				in = transform_direction(&tfm, in);
-			else
-				in = transform_point(&tfm, in);
-		}
-	}
+  /* From object */
+  else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
+    if ((to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD ||
+         to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) &&
+        is_object) {
+      if (is_direction)
+        object_dir_transform(kg, sd, &in);
+      else
+        object_position_transform(kg, sd, &in);
+    }
+    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
+      tfm = kernel_data.cam.worldtocamera;
+      if (is_direction)
+        in = transform_direction(&tfm, in);
+      else
+        in = transform_point(&tfm, in);
+    }
+  }
 
-	/* Normalize Normal */
-	if(type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL)
-		in = normalize(in);
+  /* Normalize Normal */
+  if (type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL)
+    in = normalize(in);
 
-	/* Output */
-	if(stack_valid(vector_out)) {
-		stack_store_float3(stack, vector_out, in);
-	}
+  /* Output */
+  if (stack_valid(vector_out)) {
+    stack_store_float3(stack, vector_out, in);
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index d661df54ead..c311aefaf38 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -18,143 +18,167 @@ CCL_NAMESPACE_BEGIN
 
 /* Voronoi */
 
-ccl_device void voronoi_neighbors(float3 p, NodeVoronoiDistanceMetric distance, float e, float da[4], float3 pa[4])
+ccl_device void voronoi_neighbors(
+    float3 p, NodeVoronoiDistanceMetric distance, float e, float da[4], float3 pa[4])
 {
-	/* Compute the distance to and the position of the closest neighbors to p.
-	 *
-	 * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern).
-	 * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will
-	 * contain the distance to the closest point and its coordinates respectively.
-	 */
-
-	da[0] = 1e10f;
-	da[1] = 1e10f;
-	da[2] = 1e10f;
-	da[3] = 1e10f;
-
-	pa[0] = make_float3(0.0f, 0.0f, 0.0f);
-	pa[1] = make_float3(0.0f, 0.0f, 0.0f);
-	pa[2] = make_float3(0.0f, 0.0f, 0.0f);
-	pa[3] = make_float3(0.0f, 0.0f, 0.0f);
-
-	int3 xyzi = quick_floor_to_int3(p);
-
-	for(int xx = -1; xx <= 1; xx++) {
-		for(int yy = -1; yy <= 1; yy++) {
-			for(int zz = -1; zz <= 1; zz++) {
-				int3 ip = xyzi + make_int3(xx, yy, zz);
-				float3 fp = make_float3(ip.x, ip.y, ip.z);
-				float3 vp = fp + cellnoise3(fp);
-
-				float d;
-				switch(distance) {
-					case NODE_VORONOI_DISTANCE:
-						d = len_squared(p - vp);
-						break;
-					case NODE_VORONOI_MANHATTAN:
-						d = reduce_add(fabs(vp - p));
-						break;
-					case NODE_VORONOI_CHEBYCHEV:
-						d = max3(fabs(vp - p));
-						break;
-					case NODE_VORONOI_MINKOWSKI: {
-						float3 n = fabs(vp - p);
-						if(e == 0.5f) {
-							d = sqr(reduce_add(sqrt(n)));
-						}
-						else {
-							d = powf(reduce_add(pow3(n, e)), 1.0f/e);
-						}
-						break;
-					}
-				}
-
-				/* To keep the shortest four distances and associated points we have to keep them in sorted order. */
-				if(d < da[0]) {
-					da[3] = da[2];
-					da[2] = da[1];
-					da[1] = da[0];
-					da[0] = d;
-
-					pa[3] = pa[2];
-					pa[2] = pa[1];
-					pa[1] = pa[0];
-					pa[0] = vp;
-				}
-				else if(d < da[1]) {
-					da[3] = da[2];
-					da[2] = da[1];
-					da[1] = d;
-
-					pa[3] = pa[2];
-					pa[2] = pa[1];
-					pa[1] = vp;
-				}
-				else if(d < da[2]) {
-					da[3] = da[2];
-					da[2] = d;
-
-					pa[3] = pa[2];
-					pa[2] = vp;
-				}
-				else if(d < da[3]) {
-					da[3] = d;
-					pa[3] = vp;
-				}
-			}
-		}
-	}
+  /* Compute the distance to and the position of the closest neighbors to p.
+   *
+   * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern).
+   * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will
+   * contain the distance to the closest point and its coordinates respectively.
+   */
+
+  da[0] = 1e10f;
+  da[1] = 1e10f;
+  da[2] = 1e10f;
+  da[3] = 1e10f;
+
+  pa[0] = make_float3(0.0f, 0.0f, 0.0f);
+  pa[1] = make_float3(0.0f, 0.0f, 0.0f);
+  pa[2] = make_float3(0.0f, 0.0f, 0.0f);
+  pa[3] = make_float3(0.0f, 0.0f, 0.0f);
+
+  int3 xyzi = quick_floor_to_int3(p);
+
+  for (int xx = -1; xx <= 1; xx++) {
+    for (int yy = -1; yy <= 1; yy++) {
+      for (int zz = -1; zz <= 1; zz++) {
+        int3 ip = xyzi + make_int3(xx, yy, zz);
+        float3 fp = make_float3(ip.x, ip.y, ip.z);
+        float3 vp = fp + cellnoise3(fp);
+
+        float d;
+        switch (distance) {
+          case NODE_VORONOI_DISTANCE:
+            d = len_squared(p - vp);
+            break;
+          case NODE_VORONOI_MANHATTAN:
+            d = reduce_add(fabs(vp - p));
+            break;
+          case NODE_VORONOI_CHEBYCHEV:
+            d = max3(fabs(vp - p));
+            break;
+          case NODE_VORONOI_MINKOWSKI: {
+            float3 n = fabs(vp - p);
+            if (e == 0.5f) {
+              d = sqr(reduce_add(sqrt(n)));
+            }
+            else {
+              d = powf(reduce_add(pow3(n, e)), 1.0f / e);
+            }
+            break;
+          }
+        }
+
+        /* To keep the shortest four distances and associated points we have to keep them in sorted order. */
+        if (d < da[0]) {
+          da[3] = da[2];
+          da[2] = da[1];
+          da[1] = da[0];
+          da[0] = d;
+
+          pa[3] = pa[2];
+          pa[2] = pa[1];
+          pa[1] = pa[0];
+          pa[0] = vp;
+        }
+        else if (d < da[1]) {
+          da[3] = da[2];
+          da[2] = da[1];
+          da[1] = d;
+
+          pa[3] = pa[2];
+          pa[2] = pa[1];
+          pa[1] = vp;
+        }
+        else if (d < da[2]) {
+          da[3] = da[2];
+          da[2] = d;
+
+          pa[3] = pa[2];
+          pa[2] = vp;
+        }
+        else if (d < da[3]) {
+          da[3] = d;
+          pa[3] = vp;
+        }
+      }
+    }
+  }
 }
 
-ccl_device void svm_node_tex_voronoi(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_voronoi(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint4 node2 = read_node(kg, offset);
-
-	uint co_offset, coloring, distance, feature;
-	uint scale_offset, e_offset, fac_offset, color_offset;
-
-	decode_node_uchar4(node.y, &co_offset, &coloring, &distance, &feature);
-	decode_node_uchar4(node.z, &scale_offset, &e_offset, &fac_offset, &color_offset);
-
-	float3 co = stack_load_float3(stack, co_offset);
-	float scale = stack_load_float_default(stack, scale_offset, node2.x);
-	float exponent = stack_load_float_default(stack, e_offset, node2.y);
-
-	float dist[4];
-	float3 neighbor[4];
-	voronoi_neighbors(co*scale, (NodeVoronoiDistanceMetric)distance, exponent, dist, neighbor);
-
-	float3 color;
-	float fac;
-	if(coloring == NODE_VORONOI_INTENSITY) {
-		switch(feature) {
-			case NODE_VORONOI_F1: fac = dist[0]; break;
-			case NODE_VORONOI_F2: fac = dist[1]; break;
-			case NODE_VORONOI_F3: fac = dist[2]; break;
-			case NODE_VORONOI_F4: fac = dist[3]; break;
-			case NODE_VORONOI_F2F1: fac = dist[1] - dist[0]; break;
-		}
-
-		color = make_float3(fac, fac, fac);
-	}
-	else {
-		 /* NODE_VORONOI_CELLS */
-		switch(feature) {
-			case NODE_VORONOI_F1: color = neighbor[0]; break;
-			case NODE_VORONOI_F2: color = neighbor[1]; break;
-			case NODE_VORONOI_F3: color = neighbor[2]; break;
-			case NODE_VORONOI_F4: color = neighbor[3]; break;
-			/* Usefulness of this vector is questionable. Note F2 >= F1 but the
-			 * individual vector components might not be. */
-			case NODE_VORONOI_F2F1: color = fabs(neighbor[1] - neighbor[0]); break;
-		}
-
-		color = cellnoise3(color);
-		fac = average(color);
-	}
-
-	if(stack_valid(fac_offset)) stack_store_float(stack, fac_offset, fac);
-	if(stack_valid(color_offset)) stack_store_float3(stack, color_offset, color);
+  uint4 node2 = read_node(kg, offset);
+
+  uint co_offset, coloring, distance, feature;
+  uint scale_offset, e_offset, fac_offset, color_offset;
+
+  decode_node_uchar4(node.y, &co_offset, &coloring, &distance, &feature);
+  decode_node_uchar4(node.z, &scale_offset, &e_offset, &fac_offset, &color_offset);
+
+  float3 co = stack_load_float3(stack, co_offset);
+  float scale = stack_load_float_default(stack, scale_offset, node2.x);
+  float exponent = stack_load_float_default(stack, e_offset, node2.y);
+
+  float dist[4];
+  float3 neighbor[4];
+  voronoi_neighbors(co * scale, (NodeVoronoiDistanceMetric)distance, exponent, dist, neighbor);
+
+  float3 color;
+  float fac;
+  if (coloring == NODE_VORONOI_INTENSITY) {
+    switch (feature) {
+      case NODE_VORONOI_F1:
+        fac = dist[0];
+        break;
+      case NODE_VORONOI_F2:
+        fac = dist[1];
+        break;
+      case NODE_VORONOI_F3:
+        fac = dist[2];
+        break;
+      case NODE_VORONOI_F4:
+        fac = dist[3];
+        break;
+      case NODE_VORONOI_F2F1:
+        fac = dist[1] - dist[0];
+        break;
+    }
+
+    color = make_float3(fac, fac, fac);
+  }
+  else {
+    /* NODE_VORONOI_CELLS */
+    switch (feature) {
+      case NODE_VORONOI_F1:
+        color = neighbor[0];
+        break;
+      case NODE_VORONOI_F2:
+        color = neighbor[1];
+        break;
+      case NODE_VORONOI_F3:
+        color = neighbor[2];
+        break;
+      case NODE_VORONOI_F4:
+        color = neighbor[3];
+        break;
+      /* Usefulness of this vector is questionable. Note F2 >= F1 but the
+       * individual vector components might not be. */
+      case NODE_VORONOI_F2F1:
+        color = fabs(neighbor[1] - neighbor[0]);
+        break;
+    }
+
+    color = cellnoise3(color);
+    fac = average(color);
+  }
+
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, fac);
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, color);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 43b433683e0..26d8cc71d3b 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -19,37 +19,34 @@ CCL_NAMESPACE_BEGIN
 /* TODO(sergey): Think of making it more generic volume-type attribute
  * sampler.
  */
-ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
-                                   ShaderData *sd,
-                                   float *stack,
-                                   uint4 node,
-                                   int *offset)
+ccl_device void svm_node_tex_voxel(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint co_offset, density_out_offset, color_out_offset, space;
-	decode_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
+  uint co_offset, density_out_offset, color_out_offset, space;
+  decode_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
 #ifdef __VOLUME__
-	int id = node.y;
-	float3 co = stack_load_float3(stack, co_offset);
-	if(space == NODE_TEX_VOXEL_SPACE_OBJECT) {
-		co = volume_normalized_position(kg, sd, co);
-	}
-	else {
-		kernel_assert(space == NODE_TEX_VOXEL_SPACE_WORLD);
-		Transform tfm;
-		tfm.x = read_node_float(kg, offset);
-		tfm.y = read_node_float(kg, offset);
-		tfm.z = read_node_float(kg, offset);
-		co = transform_point(&tfm, co);
-	}
+  int id = node.y;
+  float3 co = stack_load_float3(stack, co_offset);
+  if (space == NODE_TEX_VOXEL_SPACE_OBJECT) {
+    co = volume_normalized_position(kg, sd, co);
+  }
+  else {
+    kernel_assert(space == NODE_TEX_VOXEL_SPACE_WORLD);
+    Transform tfm;
+    tfm.x = read_node_float(kg, offset);
+    tfm.y = read_node_float(kg, offset);
+    tfm.z = read_node_float(kg, offset);
+    co = transform_point(&tfm, co);
+  }
 
-	float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE);
+  float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE);
 #else
-	float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 #endif
-	if(stack_valid(density_out_offset))
-		stack_store_float(stack, density_out_offset, r.w);
-	if(stack_valid(color_out_offset))
-		stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z));
+  if (stack_valid(density_out_offset))
+    stack_store_float(stack, density_out_offset, r.w);
+  if (stack_valid(color_out_offset))
+    stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z));
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index 80b63dc80cd..003ad7dc63a 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -18,48 +18,58 @@ CCL_NAMESPACE_BEGIN
 
 /* Wave */
 
-ccl_device_noinline float svm_wave(NodeWaveType type, NodeWaveProfile profile, float3 p, float detail, float distortion, float dscale)
+ccl_device_noinline float svm_wave(NodeWaveType type,
+                                   NodeWaveProfile profile,
+                                   float3 p,
+                                   float detail,
+                                   float distortion,
+                                   float dscale)
 {
-	float n;
+  float n;
 
-	if(type == NODE_WAVE_BANDS)
-		n = (p.x + p.y + p.z) * 10.0f;
-	else  /* NODE_WAVE_RINGS */
-		n = len(p) * 20.0f;
+  if (type == NODE_WAVE_BANDS)
+    n = (p.x + p.y + p.z) * 10.0f;
+  else /* NODE_WAVE_RINGS */
+    n = len(p) * 20.0f;
 
-	if(distortion != 0.0f)
-		n += distortion * noise_turbulence(p*dscale, detail, 0);
+  if (distortion != 0.0f)
+    n += distortion * noise_turbulence(p * dscale, detail, 0);
 
-	if(profile == NODE_WAVE_PROFILE_SIN) {
-		return 0.5f + 0.5f * sinf(n);
-	}
-	else { /* NODE_WAVE_PROFILE_SAW */
-		n /= M_2PI_F;
-		n -= (int) n;
-		return (n < 0.0f)? n + 1.0f: n;
-	}
+  if (profile == NODE_WAVE_PROFILE_SIN) {
+    return 0.5f + 0.5f * sinf(n);
+  }
+  else { /* NODE_WAVE_PROFILE_SAW */
+    n /= M_2PI_F;
+    n -= (int)n;
+    return (n < 0.0f) ? n + 1.0f : n;
+  }
 }
 
-ccl_device void svm_node_tex_wave(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device void svm_node_tex_wave(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-	uint4 node2 = read_node(kg, offset);
+  uint4 node2 = read_node(kg, offset);
 
-	uint type;
-	uint co_offset, scale_offset, detail_offset, dscale_offset, distortion_offset, color_offset, fac_offset;
+  uint type;
+  uint co_offset, scale_offset, detail_offset, dscale_offset, distortion_offset, color_offset,
+      fac_offset;
 
-	decode_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset);
-	decode_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
+  decode_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset);
+  decode_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
 
-	float3 co = stack_load_float3(stack, co_offset);
-	float scale = stack_load_float_default(stack, scale_offset, node2.x);
-	float detail = stack_load_float_default(stack, detail_offset, node2.y);
-	float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
-	float dscale = stack_load_float_default(stack, dscale_offset, node2.w);
+  float3 co = stack_load_float3(stack, co_offset);
+  float scale = stack_load_float_default(stack, scale_offset, node2.x);
+  float detail = stack_load_float_default(stack, detail_offset, node2.y);
+  float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
+  float dscale = stack_load_float_default(stack, dscale_offset, node2.w);
 
-	float f = svm_wave((NodeWaveType)type, (NodeWaveProfile)node.w, co*scale, detail, distortion, dscale);
+  float f = svm_wave(
+      (NodeWaveType)type, (NodeWaveProfile)node.w, co * scale, detail, distortion, dscale);
 
-	if(stack_valid(fac_offset)) stack_store_float(stack, fac_offset, f);
-	if(stack_valid(color_offset)) stack_store_float3(stack, color_offset, make_float3(f, f, f));
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, f);
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, make_float3(f, f, f));
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h
index e935fd20690..d6144802559 100644
--- a/intern/cycles/kernel/svm/svm_wavelength.h
+++ b/intern/cycles/kernel/svm/svm_wavelength.h
@@ -10,13 +10,13 @@
  * modification, are permitted provided that the following conditions are
  * met:
  * * Redistributions of source code must retain the above copyright
- *	 notice, this list of conditions and the following disclaimer.
+ *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above copyright
- *	 notice, this list of conditions and the following disclaimer in the
- *	 documentation and/or other materials provided with the distribution.
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
  * * Neither the name of Sony Pictures Imageworks nor the names of its
- *	 contributors may be used to endorse or promote products derived from
- *	 this software without specific prior written permission.
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,64 +35,64 @@ CCL_NAMESPACE_BEGIN
 /* Wavelength to RGB */
 
 // CIE colour matching functions xBar, yBar, and zBar for
-//	 wavelengths from 380 through 780 nanometers, every 5
-//	 nanometers.  For a wavelength lambda in this range:
-//		  cie_colour_match[(lambda - 380) / 5][0] = xBar
-//		  cie_colour_match[(lambda - 380) / 5][1] = yBar
-//		  cie_colour_match[(lambda - 380) / 5][2] = zBar
+//   wavelengths from 380 through 780 nanometers, every 5
+//   nanometers.  For a wavelength lambda in this range:
+//        cie_colour_match[(lambda - 380) / 5][0] = xBar
+//        cie_colour_match[(lambda - 380) / 5][1] = yBar
+//        cie_colour_match[(lambda - 380) / 5][2] = zBar
 ccl_static_constant float cie_colour_match[81][3] = {
-	{0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f},
-	{0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f},
-	{0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f},
-	{0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f},
-	{0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f},
-	{0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f},
-	{0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f},
-	{0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f},
-	{0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f},
-	{0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f},
-	{0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f},
-	{0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f},
-	{0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f},
-	{0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f},
-	{1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f},
-	{1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f},
-	{0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f},
-	{0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f},
-	{0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f},
-	{0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f},
-	{0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f},
-	{0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f},
-	{0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f},
-	{0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f},
-	{0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f},
-	{0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f},
-	{0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f}
-};
+    {0.0014f, 0.0000f, 0.0065f}, {0.0022f, 0.0001f, 0.0105f}, {0.0042f, 0.0001f, 0.0201f},
+    {0.0076f, 0.0002f, 0.0362f}, {0.0143f, 0.0004f, 0.0679f}, {0.0232f, 0.0006f, 0.1102f},
+    {0.0435f, 0.0012f, 0.2074f}, {0.0776f, 0.0022f, 0.3713f}, {0.1344f, 0.0040f, 0.6456f},
+    {0.2148f, 0.0073f, 1.0391f}, {0.2839f, 0.0116f, 1.3856f}, {0.3285f, 0.0168f, 1.6230f},
+    {0.3483f, 0.0230f, 1.7471f}, {0.3481f, 0.0298f, 1.7826f}, {0.3362f, 0.0380f, 1.7721f},
+    {0.3187f, 0.0480f, 1.7441f}, {0.2908f, 0.0600f, 1.6692f}, {0.2511f, 0.0739f, 1.5281f},
+    {0.1954f, 0.0910f, 1.2876f}, {0.1421f, 0.1126f, 1.0419f}, {0.0956f, 0.1390f, 0.8130f},
+    {0.0580f, 0.1693f, 0.6162f}, {0.0320f, 0.2080f, 0.4652f}, {0.0147f, 0.2586f, 0.3533f},
+    {0.0049f, 0.3230f, 0.2720f}, {0.0024f, 0.4073f, 0.2123f}, {0.0093f, 0.5030f, 0.1582f},
+    {0.0291f, 0.6082f, 0.1117f}, {0.0633f, 0.7100f, 0.0782f}, {0.1096f, 0.7932f, 0.0573f},
+    {0.1655f, 0.8620f, 0.0422f}, {0.2257f, 0.9149f, 0.0298f}, {0.2904f, 0.9540f, 0.0203f},
+    {0.3597f, 0.9803f, 0.0134f}, {0.4334f, 0.9950f, 0.0087f}, {0.5121f, 1.0000f, 0.0057f},
+    {0.5945f, 0.9950f, 0.0039f}, {0.6784f, 0.9786f, 0.0027f}, {0.7621f, 0.9520f, 0.0021f},
+    {0.8425f, 0.9154f, 0.0018f}, {0.9163f, 0.8700f, 0.0017f}, {0.9786f, 0.8163f, 0.0014f},
+    {1.0263f, 0.7570f, 0.0011f}, {1.0567f, 0.6949f, 0.0010f}, {1.0622f, 0.6310f, 0.0008f},
+    {1.0456f, 0.5668f, 0.0006f}, {1.0026f, 0.5030f, 0.0003f}, {0.9384f, 0.4412f, 0.0002f},
+    {0.8544f, 0.3810f, 0.0002f}, {0.7514f, 0.3210f, 0.0001f}, {0.6424f, 0.2650f, 0.0000f},
+    {0.5419f, 0.2170f, 0.0000f}, {0.4479f, 0.1750f, 0.0000f}, {0.3608f, 0.1382f, 0.0000f},
+    {0.2835f, 0.1070f, 0.0000f}, {0.2187f, 0.0816f, 0.0000f}, {0.1649f, 0.0610f, 0.0000f},
+    {0.1212f, 0.0446f, 0.0000f}, {0.0874f, 0.0320f, 0.0000f}, {0.0636f, 0.0232f, 0.0000f},
+    {0.0468f, 0.0170f, 0.0000f}, {0.0329f, 0.0119f, 0.0000f}, {0.0227f, 0.0082f, 0.0000f},
+    {0.0158f, 0.0057f, 0.0000f}, {0.0114f, 0.0041f, 0.0000f}, {0.0081f, 0.0029f, 0.0000f},
+    {0.0058f, 0.0021f, 0.0000f}, {0.0041f, 0.0015f, 0.0000f}, {0.0029f, 0.0010f, 0.0000f},
+    {0.0020f, 0.0007f, 0.0000f}, {0.0014f, 0.0005f, 0.0000f}, {0.0010f, 0.0004f, 0.0000f},
+    {0.0007f, 0.0002f, 0.0000f}, {0.0005f, 0.0002f, 0.0000f}, {0.0003f, 0.0001f, 0.0000f},
+    {0.0002f, 0.0001f, 0.0000f}, {0.0002f, 0.0001f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f},
+    {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}};
 
-ccl_device void svm_node_wavelength(KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out)
+ccl_device void svm_node_wavelength(
+    KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out)
 {
-	float lambda_nm = stack_load_float(stack, wavelength);
-	float ii = (lambda_nm-380.0f) * (1.0f/5.0f);  // scaled 0..80
-	int i = float_to_int(ii);
-	float3 color;
+  float lambda_nm = stack_load_float(stack, wavelength);
+  float ii = (lambda_nm - 380.0f) * (1.0f / 5.0f);  // scaled 0..80
+  int i = float_to_int(ii);
+  float3 color;
 
-	if(i < 0 || i >= 80) {
-		color = make_float3(0.0f, 0.0f, 0.0f);
-	}
-	else {
-		ii -= i;
-		ccl_constant float *c = cie_colour_match[i];
-		color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii);
-	}
+  if (i < 0 || i >= 80) {
+    color = make_float3(0.0f, 0.0f, 0.0f);
+  }
+  else {
+    ii -= i;
+    ccl_constant float *c = cie_colour_match[i];
+    color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii);
+  }
 
-	color = xyz_to_rgb(kg, color);
-	color *= 1.0f/2.52f;	// Empirical scale from lg to make all comps <= 1
+  color = xyz_to_rgb(kg, color);
+  color *= 1.0f / 2.52f;  // Empirical scale from lg to make all comps <= 1
 
-	/* Clamp to zero if values are smaller */
-	color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+  /* Clamp to zero if values are smaller */
+  color = max(color, make_float3(0.0f, 0.0f, 0.0f));
 
-	stack_store_float3(stack, color_out, color);
+  stack_store_float3(stack, color_out, color);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 35df9e8a0e7..55e61d0e8c7 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -34,103 +34,97 @@ CCL_NAMESPACE_BEGIN
 
 /* Wireframe Node */
 
-ccl_device_inline float wireframe(KernelGlobals *kg,
-                                  ShaderData *sd,
-                                  float size,
-                                  int pixel_size,
-                                  float3 *P)
+ccl_device_inline float wireframe(
+    KernelGlobals *kg, ShaderData *sd, float size, int pixel_size, float3 *P)
 {
 #ifdef __HAIR__
-	if(sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE)
+  if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE)
 #else
-	if(sd->prim != PRIM_NONE)
+  if (sd->prim != PRIM_NONE)
 #endif
-	{
-		float3 Co[3];
-		float pixelwidth = 1.0f;
+  {
+    float3 Co[3];
+    float pixelwidth = 1.0f;
 
-		/* Triangles */
-		int np = 3;
+    /* Triangles */
+    int np = 3;
 
-		if(sd->type & PRIMITIVE_TRIANGLE)
-			triangle_vertices(kg, sd->prim, Co);
-		else
-			motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co);
+    if (sd->type & PRIMITIVE_TRIANGLE)
+      triangle_vertices(kg, sd->prim, Co);
+    else
+      motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co);
 
-		if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-			object_position_transform(kg, sd, &Co[0]);
-			object_position_transform(kg, sd, &Co[1]);
-			object_position_transform(kg, sd, &Co[2]);
-		}
+    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+      object_position_transform(kg, sd, &Co[0]);
+      object_position_transform(kg, sd, &Co[1]);
+      object_position_transform(kg, sd, &Co[2]);
+    }
 
-		if(pixel_size) {
-			// Project the derivatives of P to the viewing plane defined
-			// by I so we have a measure of how big is a pixel at this point
-			float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I);
-			float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I);
-			// Take the average of both axis' length
-			pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f;
-		}
+    if (pixel_size) {
+      // Project the derivatives of P to the viewing plane defined
+      // by I so we have a measure of how big is a pixel at this point
+      float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I);
+      float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I);
+      // Take the average of both axis' length
+      pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f;
+    }
 
-		// Use half the width as the neighbor face will render the
-		// other half. And take the square for fast comparison
-		pixelwidth *= 0.5f * size;
-		pixelwidth *= pixelwidth;
-		for(int i = 0; i < np; i++) {
-			int i2 = i ? i - 1 : np - 1;
-			float3 dir = *P - Co[i];
-			float3 edge = Co[i] - Co[i2];
-			float3 crs = cross(edge, dir);
-			// At this point dot(crs, crs) / dot(edge, edge) is
-			// the square of area / length(edge) == square of the
-			// distance to the edge.
-			if(dot(crs, crs) < (dot(edge, edge) * pixelwidth))
-				return 1.0f;
-		}
-	}
-	return 0.0f;
+    // Use half the width as the neighbor face will render the
+    // other half. And take the square for fast comparison
+    pixelwidth *= 0.5f * size;
+    pixelwidth *= pixelwidth;
+    for (int i = 0; i < np; i++) {
+      int i2 = i ? i - 1 : np - 1;
+      float3 dir = *P - Co[i];
+      float3 edge = Co[i] - Co[i2];
+      float3 crs = cross(edge, dir);
+      // At this point dot(crs, crs) / dot(edge, edge) is
+      // the square of area / length(edge) == square of the
+      // distance to the edge.
+      if (dot(crs, crs) < (dot(edge, edge) * pixelwidth))
+        return 1.0f;
+    }
+  }
+  return 0.0f;
 }
 
-ccl_device void svm_node_wireframe(KernelGlobals *kg,
-                                   ShaderData *sd,
-                                   float *stack,
-                                   uint4 node)
+ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-	uint in_size = node.y;
-	uint out_fac = node.z;
-	uint use_pixel_size, bump_offset;
-	decode_node_uchar4(node.w, &use_pixel_size, &bump_offset, NULL, NULL);
+  uint in_size = node.y;
+  uint out_fac = node.z;
+  uint use_pixel_size, bump_offset;
+  decode_node_uchar4(node.w, &use_pixel_size, &bump_offset, NULL, NULL);
 
-	/* Input Data */
-	float size = stack_load_float(stack, in_size);
-	int pixel_size = (int)use_pixel_size;
+  /* Input Data */
+  float size = stack_load_float(stack, in_size);
+  int pixel_size = (int)use_pixel_size;
 
-	/* Calculate wireframe */
+  /* Calculate wireframe */
 #ifdef __SPLIT_KERNEL__
-	/* TODO(sergey): This is because sd is actually a global space,
-	 * which makes it difficult to re-use same wireframe() function.
-	 *
-	 * With OpenCL 2.0 it's possible to avoid this change, but for until
-	 * then we'll be living with such an exception.
-	 */
-	float3 P = sd->P;
-	float f = wireframe(kg, sd, size, pixel_size, &P);
+  /* TODO(sergey): This is because sd is actually a global space,
+   * which makes it difficult to re-use same wireframe() function.
+   *
+   * With OpenCL 2.0 it's possible to avoid this change, but for until
+   * then we'll be living with such an exception.
+   */
+  float3 P = sd->P;
+  float f = wireframe(kg, sd, size, pixel_size, &P);
 #else
-	float f = wireframe(kg, sd, size, pixel_size, &sd->P);
+  float f = wireframe(kg, sd, size, pixel_size, &sd->P);
 #endif
 
-	/* TODO(sergey): Think of faster way to calculate derivatives. */
-	if(bump_offset == NODE_BUMP_OFFSET_DX) {
-		float3 Px = sd->P - sd->dP.dx;
-		f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx);
-	}
-	else if(bump_offset == NODE_BUMP_OFFSET_DY) {
-		float3 Py = sd->P - sd->dP.dy;
-		f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy);
-	}
+  /* TODO(sergey): Think of faster way to calculate derivatives. */
+  if (bump_offset == NODE_BUMP_OFFSET_DX) {
+    float3 Px = sd->P - sd->dP.dx;
+    f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx);
+  }
+  else if (bump_offset == NODE_BUMP_OFFSET_DY) {
+    float3 Py = sd->P - sd->dP.dy;
+    f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy);
+  }
 
-	if(stack_valid(out_fac))
-		stack_store_float(stack, out_fac, f);
+  if (stack_valid(out_fac))
+    stack_store_float(stack, out_fac, f);
 }
 
 CCL_NAMESPACE_END
author	Campbell Barton <ideasman42@gmail.com>	2019-04-17 07:17:24 +0300
committer	Campbell Barton <ideasman42@gmail.com>	2019-04-17 07:21:24 +0300
commit	e12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 (patch)
tree	8cf3453d12edb177a218ef8009357518ec6cab6a /intern/cycles/kernel
parent	b3dabc200a4b0399ec6b81f2ff2730d07b44fcaa (diff)